74#include "llvm/Config/llvm-config.h"
104#define DEBUG_TYPE "pipeliner"
106STATISTIC(NumTrytoPipeline,
"Number of loops that we attempt to pipeline");
107STATISTIC(NumPipelined,
"Number of loops software pipelined");
108STATISTIC(NumNodeOrderIssues,
"Number of node order issues found");
109STATISTIC(NumFailBranch,
"Pipeliner abort due to unknown branch");
110STATISTIC(NumFailLoop,
"Pipeliner abort due to unsupported loop");
111STATISTIC(NumFailPreheader,
"Pipeliner abort due to missing preheader");
112STATISTIC(NumFailLargeMaxMII,
"Pipeliner abort due to MaxMII too large");
113STATISTIC(NumFailZeroMII,
"Pipeliner abort due to zero MII");
114STATISTIC(NumFailNoSchedule,
"Pipeliner abort due to no schedule found");
115STATISTIC(NumFailZeroStage,
"Pipeliner abort due to zero stage");
116STATISTIC(NumFailLargeMaxStage,
"Pipeliner abort due to too many stages");
120 cl::desc(
"Enable Software Pipelining"));
129 cl::desc(
"Size limit for the MII."),
135 cl::desc(
"Force pipeliner to use specified II."),
141 cl::desc(
"Maximum stages allowed in the generated scheduled."),
148 cl::desc(
"Prune dependences between unrelated Phi nodes."),
155 cl::desc(
"Prune loop carried order dependences."),
173 cl::desc(
"Instead of emitting the pipelined code, annotate instructions "
174 "with the generated schedule for feeding into the "
175 "-modulo-schedule-test pass"));
180 "Use the experimental peeling code generator for software pipelining"));
188 cl::desc(
"Limit register pressure of scheduled loop"));
193 cl::desc(
"Margin representing the unused percentage of "
194 "the register pressure limit"));
198 cl::desc(
"Use the MVE code generator for software pipelining"));
205 cl::desc(
"Enable CopyToPhi DAG Mutation"));
210 "pipeliner-force-issue-width",
217 cl::desc(
"Set how to use window scheduling algorithm."),
219 "Turn off window algorithm."),
221 "Use window algorithm after SMS algorithm fails."),
223 "Use window algorithm instead of SMS algorithm.")));
227unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
235 "Modulo Software Pipelining",
false,
false)
245 if (skipFunction(mf.getFunction()))
251 if (mf.getFunction().getAttributes().hasFnAttr(Attribute::OptimizeForSize) &&
255 if (!mf.getSubtarget().enableMachinePipeliner())
260 if (mf.getSubtarget().useDFAforSMS() &&
261 (!mf.getSubtarget().getInstrItineraryData() ||
262 mf.getSubtarget().getInstrItineraryData()->isEmpty()))
266 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
267 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
268 ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
269 TII = MF->getSubtarget().getInstrInfo();
270 RegClassInfo.runOnMachineFunction(*MF);
272 for (
const auto &L : *MLI)
282bool MachinePipeliner::scheduleLoop(
MachineLoop &L) {
283 bool Changed =
false;
284 for (
const auto &InnerLoop : L)
285 Changed |= scheduleLoop(*InnerLoop);
297 setPragmaPipelineOptions(L);
298 if (!canPipelineLoop(L)) {
302 L.getStartLoc(), L.getHeader())
303 <<
"Failed to pipeline loop";
311 if (useSwingModuloScheduler())
312 Changed = swingModuloScheduler(L);
314 if (useWindowScheduler(Changed))
315 Changed = runWindowScheduler(L);
321void MachinePipeliner::setPragmaPipelineOptions(
MachineLoop &L) {
340 if (LoopID ==
nullptr)
347 MDNode *MD = dyn_cast<MDNode>(MDO);
357 if (S->
getString() ==
"llvm.loop.pipeline.initiationinterval") {
359 "Pipeline initiation interval hint metadata should have two operands.");
361 mdconst::extract<ConstantInt>(MD->
getOperand(1))->getZExtValue();
363 }
else if (S->
getString() ==
"llvm.loop.pipeline.disable") {
372bool MachinePipeliner::canPipelineLoop(
MachineLoop &L) {
373 if (
L.getNumBlocks() != 1) {
376 L.getStartLoc(),
L.getHeader())
377 <<
"Not a single basic block: "
378 <<
ore::NV(
"NumBlocks",
L.getNumBlocks());
386 L.getStartLoc(),
L.getHeader())
387 <<
"Disabled by Pragma.";
398 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeBranch, can NOT pipeline Loop\n");
402 L.getStartLoc(),
L.getHeader())
403 <<
"The branch can't be understood";
412 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeLoop, can NOT pipeline Loop\n");
416 L.getStartLoc(),
L.getHeader())
417 <<
"The loop structure is not supported";
422 if (!
L.getLoopPreheader()) {
423 LLVM_DEBUG(
dbgs() <<
"Preheader not found, can NOT pipeline Loop\n");
427 L.getStartLoc(),
L.getHeader())
428 <<
"No loop preheader found";
434 preprocessPhiNodes(*
L.getHeader());
441 *getAnalysis<LiveIntervalsWrapperPass>().getLIS().getSlotIndexes();
446 auto *RC =
MRI.getRegClass(DefOp.
getReg());
448 for (
unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
473bool MachinePipeliner::swingModuloScheduler(
MachineLoop &L) {
474 assert(
L.getBlocks().size() == 1 &&
"SMS works on single blocks only.");
477 *
this, L, getAnalysis<LiveIntervalsWrapperPass>().getLIS(),
RegClassInfo,
498 return SMS.hasNewSchedule();
512bool MachinePipeliner::runWindowScheduler(
MachineLoop &L) {
517 Context.
PassConfig = &getAnalysis<TargetPassConfig>();
518 Context.
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
519 Context.
LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
525bool MachinePipeliner::useSwingModuloScheduler() {
530bool MachinePipeliner::useWindowScheduler(
bool Changed) {
537 "llvm.loop.pipeline.initiationinterval is set.\n");
545void SwingSchedulerDAG::setMII(
unsigned ResMII,
unsigned RecMII) {
548 else if (II_setByPragma > 0)
549 MII = II_setByPragma;
551 MII = std::max(ResMII, RecMII);
554void SwingSchedulerDAG::setMAX_II() {
557 else if (II_setByPragma > 0)
558 MAX_II = II_setByPragma;
568 addLoopCarriedDependences(AA);
569 updatePhiDependences();
576 findCircuits(NodeSets);
580 unsigned ResMII = calculateResMII();
581 unsigned RecMII = calculateRecMII(NodeSets);
589 setMII(ResMII, RecMII);
593 <<
" (rec=" << RecMII <<
", res=" << ResMII <<
")\n");
599 Pass.ORE->emit([&]() {
602 <<
"Invalid Minimal Initiation Interval: 0";
610 <<
", we don't pipeline large loops\n");
611 NumFailLargeMaxMII++;
612 Pass.ORE->emit([&]() {
615 <<
"Minimal Initiation Interval too large: "
616 <<
ore::NV(
"MII", (
int)MII) <<
" > "
618 <<
"Refer to -pipeliner-max-mii.";
623 computeNodeFunctions(NodeSets);
625 registerPressureFilter(NodeSets);
627 colocateNodeSets(NodeSets);
629 checkNodeSets(NodeSets);
632 for (
auto &
I : NodeSets) {
633 dbgs() <<
" Rec NodeSet ";
640 groupRemainingNodes(NodeSets);
642 removeDuplicateNodes(NodeSets);
645 for (
auto &
I : NodeSets) {
646 dbgs() <<
" NodeSet ";
651 computeNodeOrder(NodeSets);
654 checkValidNodeOrder(Circuits);
657 Scheduled = schedulePipeline(Schedule);
662 Pass.ORE->emit([&]() {
665 <<
"Unable to find schedule";
672 if (numStages == 0) {
675 Pass.ORE->emit([&]() {
678 <<
"No need to pipeline - no overlapped iterations in schedule.";
685 <<
" : too many stages, abort\n");
686 NumFailLargeMaxStage++;
687 Pass.ORE->emit([&]() {
690 <<
"Too many stages in schedule: "
691 <<
ore::NV(
"numStages", (
int)numStages) <<
" > "
693 <<
". Refer to -pipeliner-max-stages.";
698 Pass.ORE->emit([&]() {
701 <<
"Pipelined succesfully!";
706 std::vector<MachineInstr *> OrderedInsts;
710 OrderedInsts.push_back(SU->getInstr());
711 Cycles[SU->getInstr()] =
Cycle;
716 for (
auto &KV : NewMIs) {
717 Cycles[KV.first] = Cycles[KV.second];
718 Stages[KV.first] = Stages[KV.second];
719 NewInstrChanges[KV.first] = InstrChanges[
getSUnit(KV.first)];
726 "Cannot serialize a schedule with InstrChanges!");
750 for (
auto &KV : NewMIs)
761 unsigned &InitVal,
unsigned &LoopVal) {
762 assert(Phi.isPHI() &&
"Expecting a Phi.");
766 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
767 if (Phi.getOperand(i + 1).getMBB() !=
Loop)
768 InitVal = Phi.getOperand(i).getReg();
770 LoopVal = Phi.getOperand(i).getReg();
772 assert(InitVal != 0 && LoopVal != 0 &&
"Unexpected Phi structure.");
778 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
779 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
780 return Phi.getOperand(i).getReg();
789 while (!Worklist.
empty()) {
791 for (
const auto &SI : SU->
Succs) {
792 SUnit *SuccSU = SI.getSUnit();
794 if (Visited.
count(SuccSU))
809 return MI.isCall() ||
MI.mayRaiseFPException() ||
810 MI.hasUnmodeledSideEffects() ||
811 (
MI.hasOrderedMemoryRef() &&
812 (!
MI.mayLoad() || !
MI.isDereferenceableInvariantLoad()));
820 if (!
MI->hasOneMemOperand())
826 for (
const Value *V : Objs) {
838void SwingSchedulerDAG::addLoopCarriedDependences(
AliasAnalysis *AA) {
845 PendingLoads.
clear();
846 else if (
MI.mayLoad()) {
851 for (
const auto *V : Objs) {
855 }
else if (
MI.mayStore()) {
860 for (
const auto *V : Objs) {
862 PendingLoads.
find(V);
863 if (
I == PendingLoads.
end())
865 for (
auto *Load :
I->second) {
873 int64_t Offset1, Offset2;
874 bool Offset1IsScalable, Offset2IsScalable;
876 Offset1IsScalable,
TRI) &&
878 Offset2IsScalable,
TRI)) {
880 Offset1IsScalable == Offset2IsScalable &&
881 (
int)Offset1 < (
int)Offset2) {
883 "What happened to the chain edge?");
934void SwingSchedulerDAG::updatePhiDependences() {
942 unsigned HasPhiUse = 0;
943 unsigned HasPhiDef = 0;
967 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
972 }
else if (MO.isUse()) {
975 if (
DefMI ==
nullptr)
982 ST.adjustSchedDependency(SU, 0, &
I, MO.getOperandNo(), Dep,
989 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
998 for (
auto &PI :
I.Preds) {
1001 if (
I.getInstr()->isPHI()) {
1010 for (
const SDep &
D : RemoveDeps)
1017void SwingSchedulerDAG::changeDependences() {
1022 unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
1023 int64_t NewOffset = 0;
1024 if (!canUseLastOffsetValue(
I.getInstr(), BasePos, OffsetPos, NewBase,
1029 Register OrigBase =
I.getInstr()->getOperand(BasePos).getReg();
1049 for (
const SDep &
P :
I.Preds)
1050 if (
P.getSUnit() == DefSU)
1052 for (
const SDep &
D : Deps) {
1058 for (
auto &
P : LastSU->
Preds)
1061 for (
const SDep &
D : Deps) {
1074 InstrChanges[&
I] = std::make_pair(NewBase, NewOffset);
1085 std::vector<MachineInstr *> &OrderedInsts,
1093 Stage <= LastStage; ++Stage) {
1096 Instrs[
Cycle].push_front(SU);
1103 std::deque<SUnit *> &CycleInstrs = Instrs[
Cycle];
1105 for (
SUnit *SU : CycleInstrs) {
1107 OrderedInsts.push_back(
MI);
1117struct FuncUnitSorter {
1123 : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
1131 unsigned min = UINT_MAX;
1132 if (InstrItins && !InstrItins->
isEmpty()) {
1135 InstrItins->
endStage(SchedClass))) {
1138 if (numAlternatives < min) {
1139 min = numAlternatives;
1156 if (!PRE.ReleaseAtCycle)
1160 unsigned NumUnits = ProcResource->
NumUnits;
1161 if (NumUnits < min) {
1163 F = PRE.ProcResourceIdx;
1168 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1177 unsigned SchedClass =
MI.getDesc().getSchedClass();
1178 if (InstrItins && !InstrItins->
isEmpty()) {
1181 InstrItins->
endStage(SchedClass))) {
1184 Resources[FuncUnits]++;
1199 if (!PRE.ReleaseAtCycle)
1201 Resources[PRE.ProcResourceIdx]++;
1205 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1211 unsigned MFUs1 = minFuncUnits(IS1, F1);
1212 unsigned MFUs2 = minFuncUnits(IS2, F2);
1215 return MFUs1 > MFUs2;
1220class HighRegisterPressureDetector {
1226 const unsigned PSetNum;
1232 std::vector<unsigned> InitSetPressure;
1236 std::vector<unsigned> PressureSetLimit;
1243 using OrderedInstsTy = std::vector<MachineInstr *>;
1247 static void dumpRegisterPressures(
const std::vector<unsigned> &Pressures) {
1248 if (Pressures.size() == 0) {
1252 for (
unsigned P : Pressures) {
1262 for (
auto PSetIter =
MRI.getPressureSets(
Reg); PSetIter.isValid();
1264 dbgs() << *PSetIter <<
' ';
1269 void increaseRegisterPressure(std::vector<unsigned> &Pressure,
1271 auto PSetIter =
MRI.getPressureSets(
Reg);
1272 unsigned Weight = PSetIter.getWeight();
1273 for (; PSetIter.isValid(); ++PSetIter)
1274 Pressure[*PSetIter] += Weight;
1277 void decreaseRegisterPressure(std::vector<unsigned> &Pressure,
1279 auto PSetIter =
MRI.getPressureSets(
Reg);
1280 unsigned Weight = PSetIter.getWeight();
1281 for (; PSetIter.isValid(); ++PSetIter) {
1282 auto &
P = Pressure[*PSetIter];
1284 "register pressure must be greater than or equal weight");
1291 return Reg.isPhysical() &&
TRI->isFixedRegister(MF,
Reg.asMCReg());
1295 return Reg.isVirtual() &&
MRI.getVRegDef(
Reg)->getParent() == OrigMBB;
1306 void computeLiveIn() {
1308 for (
auto &
MI : *OrigMBB) {
1309 if (
MI.isDebugInstr())
1317 if (isFixedRegister(
Reg))
1319 if (isDefinedInThisLoop(
Reg))
1325 for (
auto LiveIn : Used)
1326 increaseRegisterPressure(InitSetPressure, LiveIn);
1331 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1332 PressureSetLimit[PSet] =
TRI->getRegPressureSetLimit(MF, PSet);
1340 if (isFixedRegister(
Reg))
1345 for (
auto Reg : FixedRegs) {
1347 const int *Sets =
TRI->getRegUnitPressureSets(
Reg);
1348 for (; *Sets != -1; Sets++) {
1349 dbgs() <<
TRI->getRegPressureSetName(*Sets) <<
", ";
1355 for (
auto Reg : FixedRegs) {
1358 auto PSetIter =
MRI.getPressureSets(
Reg);
1359 unsigned Weight = PSetIter.getWeight();
1360 for (; PSetIter.isValid(); ++PSetIter) {
1361 unsigned &Limit = PressureSetLimit[*PSetIter];
1362 assert(Limit >= Weight &&
1363 "register pressure limit must be greater than or equal weight");
1366 <<
" (decreased by " << Weight <<
")\n");
1382 Instr2LastUsesTy computeLastUses(
const OrderedInstsTy &OrderedInsts,
1383 Instr2StageTy &Stages)
const {
1389 const auto UpdateTargetRegs = [
this, &TargetRegs](
Register Reg) {
1390 if (isDefinedInThisLoop(
Reg))
1396 UpdateTargetRegs(
Reg);
1398 for (
auto &
Use : ROMap.
find(
MI)->getSecond().Uses)
1399 UpdateTargetRegs(
Use.RegUnit);
1404 return Stages[
MI] +
MI->isPHI();
1409 for (
auto &
Use : ROMap.
find(
MI)->getSecond().Uses) {
1413 auto Ite = LastUseMI.
find(
Reg);
1414 if (Ite == LastUseMI.
end()) {
1415 LastUseMI[
Reg] =
MI;
1419 if (InstrScore(Orig) < InstrScore(New))
1420 LastUseMI[
Reg] = New;
1425 Instr2LastUsesTy LastUses;
1426 for (
auto &Entry : LastUseMI)
1443 std::vector<unsigned>
1444 computeMaxSetPressure(
const OrderedInstsTy &OrderedInsts,
1445 Instr2StageTy &Stages,
1446 const unsigned StageCount)
const {
1453 auto CurSetPressure = InitSetPressure;
1454 auto MaxSetPressure = InitSetPressure;
1455 auto LastUses = computeLastUses(OrderedInsts, Stages);
1458 dbgs() <<
"Ordered instructions:\n";
1460 dbgs() <<
"Stage " << Stages[
MI] <<
": ";
1465 const auto InsertReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1467 if (!
Reg.isValid() || isFixedRegister(
Reg))
1475 increaseRegisterPressure(CurSetPressure,
Reg);
1479 const auto EraseReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1481 if (!
Reg.isValid() || isFixedRegister(
Reg))
1485 if (!RegSet.contains(
Reg))
1490 decreaseRegisterPressure(CurSetPressure,
Reg);
1494 for (
unsigned I = 0;
I < StageCount;
I++) {
1496 const auto Stage = Stages[
MI];
1500 const unsigned Iter =
I - Stage;
1502 for (
auto &Def : ROMap.
find(
MI)->getSecond().Defs)
1503 InsertReg(LiveRegSets[Iter],
Def.RegUnit);
1505 for (
auto LastUse : LastUses[
MI]) {
1508 EraseReg(LiveRegSets[Iter - 1], LastUse);
1510 EraseReg(LiveRegSets[Iter], LastUse);
1514 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1515 MaxSetPressure[PSet] =
1516 std::max(MaxSetPressure[PSet], CurSetPressure[PSet]);
1519 dbgs() <<
"CurSetPressure=";
1520 dumpRegisterPressures(CurSetPressure);
1521 dbgs() <<
" iter=" << Iter <<
" stage=" << Stage <<
":";
1527 return MaxSetPressure;
1533 : OrigMBB(OrigMBB), MF(MF),
MRI(MF.getRegInfo()),
1534 TRI(MF.getSubtarget().getRegisterInfo()),
1535 PSetNum(
TRI->getNumRegPressureSets()), InitSetPressure(PSetNum, 0),
1536 PressureSetLimit(PSetNum, 0) {}
1542 if (
MI.isDebugInstr())
1544 ROMap[&
MI].collect(
MI, *
TRI,
MRI,
false,
true);
1548 computePressureSetLimit(RCI);
1554 const unsigned MaxStage)
const {
1556 "the percentage of the margin must be between 0 to 100");
1558 OrderedInstsTy OrderedInsts;
1559 Instr2StageTy Stages;
1561 const auto MaxSetPressure =
1562 computeMaxSetPressure(OrderedInsts, Stages, MaxStage + 1);
1565 dbgs() <<
"Dump MaxSetPressure:\n";
1566 for (
unsigned I = 0;
I < MaxSetPressure.size();
I++) {
1567 dbgs() <<
format(
"MaxSetPressure[%d]=%d\n",
I, MaxSetPressure[
I]);
1572 for (
unsigned PSet = 0; PSet < PSetNum; PSet++) {
1573 unsigned Limit = PressureSetLimit[PSet];
1576 <<
" Margin=" << Margin <<
"\n");
1577 if (Limit < MaxSetPressure[PSet] + Margin) {
1580 <<
"Rejected the schedule because of too high register pressure\n");
1596unsigned SwingSchedulerDAG::calculateResMII() {
1599 return RM.calculateResMII();
1608unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1609 unsigned RecMII = 0;
1611 for (
NodeSet &Nodes : NodeSets) {
1615 unsigned Delay = Nodes.getLatency();
1616 unsigned Distance = 1;
1619 unsigned CurMII = (Delay + Distance - 1) / Distance;
1620 Nodes.setRecMII(CurMII);
1621 if (CurMII > RecMII)
1632 for (
SUnit &SU : SUnits) {
1635 DepsAdded.
push_back(std::make_pair(&SU, Pred));
1637 for (std::pair<SUnit *, SDep> &
P : DepsAdded) {
1641 SUnit *TargetSU =
D.getSUnit();
1642 unsigned Reg =
D.getReg();
1643 unsigned Lat =
D.getLatency();
1652void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1656 for (
int i = 0, e = SUnits.size(); i != e; ++i) {
1659 for (
auto &SI : SUnits[i].Succs) {
1663 int N =
SI.getSUnit()->NodeNum;
1665 auto Dep = OutputDeps.
find(BackEdge);
1666 if (Dep != OutputDeps.
end()) {
1667 BackEdge = Dep->second;
1668 OutputDeps.
erase(Dep);
1670 OutputDeps[
N] = BackEdge;
1674 if (
SI.getSUnit()->isBoundaryNode() ||
SI.isArtificial() ||
1675 (
SI.getKind() ==
SDep::Anti && !
SI.getSUnit()->getInstr()->isPHI()))
1677 int N =
SI.getSUnit()->NodeNum;
1679 AdjK[i].push_back(
N);
1685 for (
auto &PI : SUnits[i].Preds) {
1686 if (!SUnits[i].getInstr()->mayStore() ||
1689 if (PI.getKind() ==
SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1690 int N = PI.getSUnit()->NodeNum;
1692 AdjK[i].push_back(
N);
1699 for (
auto &OD : OutputDeps)
1700 if (!
Added.test(OD.second)) {
1701 AdjK[OD.first].push_back(OD.second);
1702 Added.set(OD.second);
1708bool SwingSchedulerDAG::Circuits::circuit(
int V,
int S, NodeSetType &NodeSets,
1715 for (
auto W : AdjK[V]) {
1716 if (NumPaths > MaxPaths)
1726 }
else if (!Blocked.test(W)) {
1727 if (circuit(W, S, NodeSets,
1728 Node2Idx->at(W) < Node2Idx->at(V) ?
true : HasBackedge))
1736 for (
auto W : AdjK[V]) {
1747void SwingSchedulerDAG::Circuits::unblock(
int U) {
1750 while (!BU.
empty()) {
1752 assert(SI != BU.
end() &&
"Invalid B set.");
1755 if (Blocked.test(
W->NodeNum))
1756 unblock(
W->NodeNum);
1762void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1767 Circuits Cir(
SUnits, Topo);
1769 Cir.createAdjacencyStructure(
this);
1770 for (
int i = 0, e =
SUnits.size(); i != e; ++i) {
1772 Cir.circuit(i, i, NodeSets);
1808 for (
auto &Dep : SU.
Preds) {
1809 SUnit *TmpSU = Dep.getSUnit();
1821 if (PHISUs.
size() == 0 || SrcSUs.
size() == 0)
1829 for (
auto &Dep : PHISUs[
Index]->Succs) {
1833 SUnit *TmpSU = Dep.getSUnit();
1843 if (UseSUs.
size() == 0)
1848 for (
auto *
I : UseSUs) {
1849 for (
auto *Src : SrcSUs) {
1863 if (
D.isArtificial() ||
D.getSUnit()->isBoundaryNode())
1874void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1875 ScheduleInfo.resize(
SUnits.size());
1878 for (
int I : Topo) {
1886 for (
int I : Topo) {
1888 int zeroLatencyDepth = 0;
1892 if (
P.getLatency() == 0)
1897 asap = std::max(asap, (
int)(
getASAP(
pred) +
P.getLatency() -
1900 maxASAP = std::max(maxASAP, asap);
1901 ScheduleInfo[
I].ASAP = asap;
1902 ScheduleInfo[
I].ZeroLatencyDepth = zeroLatencyDepth;
1908 int zeroLatencyHeight = 0;
1923 ScheduleInfo[
I].ALAP = alap;
1924 ScheduleInfo[
I].ZeroLatencyHeight = zeroLatencyHeight;
1929 I.computeNodeSetInfo(
this);
1932 for (
unsigned i = 0; i <
SUnits.size(); i++) {
1933 dbgs() <<
"\tNode " << i <<
":\n";
1954 if (S && S->count(Pred.
getSUnit()) == 0)
1965 if (S && S->count(Succ.
getSUnit()) == 0)
1971 return !Preds.
empty();
1983 if (S && S->count(Succ.
getSUnit()) == 0)
1993 if (S && S->count(Pred.
getSUnit()) == 0)
1999 return !Succs.
empty();
2014 if (!Visited.
insert(Cur).second)
2015 return Path.contains(Cur);
2016 bool FoundPath =
false;
2017 for (
auto &SI : Cur->
Succs)
2020 computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
2021 for (
auto &PI : Cur->
Preds)
2024 computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
2039 for (
SUnit *SU : NS) {
2045 if (Reg.isVirtual())
2047 else if (
MRI.isAllocatable(Reg))
2052 for (
SUnit *SU : NS)
2056 if (Reg.isVirtual()) {
2057 if (!
Uses.count(Reg))
2060 }
else if (
MRI.isAllocatable(Reg)) {
2062 if (!
Uses.count(Unit))
2072void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
2073 for (
auto &NS : NodeSets) {
2079 RecRPTracker.init(&
MF, &RegClassInfo, &LIS,
BB,
BB->
end(),
false,
true);
2081 RecRPTracker.closeBottom();
2083 std::vector<SUnit *>
SUnits(NS.begin(), NS.end());
2085 return A->NodeNum >
B->NodeNum;
2088 for (
auto &SU :
SUnits) {
2094 RecRPTracker.setPos(std::next(CurInstI));
2098 RecRPTracker.getMaxUpwardPressureDelta(SU->
getInstr(),
nullptr, RPDelta,
2103 dbgs() <<
"Excess register pressure: SU(" << SU->
NodeNum <<
") "
2106 NS.setExceedPressure(SU);
2109 RecRPTracker.recede();
2116void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
2117 unsigned Colocate = 0;
2118 for (
int i = 0, e = NodeSets.size(); i < e; ++i) {
2123 for (
int j = i + 1;
j <
e; ++
j) {
2144void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
2149 for (
auto &NS : NodeSets) {
2150 if (NS.getRecMII() > 2)
2152 if (NS.getMaxDepth() > MII)
2161void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
2189 NodesAdded.
insert(
I.begin(),
I.end());
2198 addConnectedNodes(
I, NewSet, NodesAdded);
2199 if (!NewSet.
empty())
2200 NodeSets.push_back(NewSet);
2207 addConnectedNodes(
I, NewSet, NodesAdded);
2208 if (!NewSet.
empty())
2209 NodeSets.push_back(NewSet);
2214 if (NodesAdded.
count(&SU) == 0) {
2216 addConnectedNodes(&SU, NewSet, NodesAdded);
2217 if (!NewSet.
empty())
2218 NodeSets.push_back(NewSet);
2224void SwingSchedulerDAG::addConnectedNodes(
SUnit *SU,
NodeSet &NewSet,
2228 for (
auto &SI : SU->
Succs) {
2230 if (!
SI.isArtificial() && !
Successor->isBoundaryNode() &&
2232 addConnectedNodes(
Successor, NewSet, NodesAdded);
2234 for (
auto &PI : SU->
Preds) {
2235 SUnit *Predecessor = PI.getSUnit();
2236 if (!PI.isArtificial() && NodesAdded.
count(Predecessor) == 0)
2237 addConnectedNodes(Predecessor, NewSet, NodesAdded);
2246 for (
SUnit *SU : Set1) {
2247 if (Set2.
count(SU) != 0)
2250 return !Result.empty();
2254void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
2263 for (
SUnit *SU : *J)
2275void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
2279 J->remove_if([&](
SUnit *SUJ) {
return I->count(SUJ); });
2294void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
2298 for (
auto &Nodes : NodeSets) {
2303 R.insert(
N.begin(),
N.end());
2307 R.insert(
N.begin(),
N.end());
2315 }
else if (NodeSets.size() == 1) {
2316 for (
const auto &
N : Nodes)
2317 if (
N->Succs.size() == 0)
2323 SUnit *maxASAP =
nullptr;
2324 for (
SUnit *SU : Nodes) {
2334 while (!
R.empty()) {
2335 if (Order == TopDown) {
2339 while (!
R.empty()) {
2340 SUnit *maxHeight =
nullptr;
2353 NodeOrder.insert(maxHeight);
2355 R.remove(maxHeight);
2356 for (
const auto &
I : maxHeight->
Succs) {
2357 if (Nodes.count(
I.getSUnit()) == 0)
2359 if (NodeOrder.contains(
I.getSUnit()))
2363 R.insert(
I.getSUnit());
2366 for (
const auto &
I : maxHeight->
Preds) {
2369 if (Nodes.count(
I.getSUnit()) == 0)
2371 if (NodeOrder.contains(
I.getSUnit()))
2373 R.insert(
I.getSUnit());
2379 if (
pred_L(NodeOrder,
N, &Nodes))
2380 R.insert(
N.begin(),
N.end());
2385 while (!
R.empty()) {
2386 SUnit *maxDepth =
nullptr;
2398 NodeOrder.insert(maxDepth);
2401 if (Nodes.isExceedSU(maxDepth)) {
2404 R.insert(Nodes.getNode(0));
2407 for (
const auto &
I : maxDepth->
Preds) {
2408 if (Nodes.count(
I.getSUnit()) == 0)
2410 if (NodeOrder.contains(
I.getSUnit()))
2412 R.insert(
I.getSUnit());
2415 for (
const auto &
I : maxDepth->
Succs) {
2418 if (Nodes.count(
I.getSUnit()) == 0)
2420 if (NodeOrder.contains(
I.getSUnit()))
2422 R.insert(
I.getSUnit());
2428 if (
succ_L(NodeOrder,
N, &Nodes))
2429 R.insert(
N.begin(),
N.end());
2436 dbgs() <<
"Node order: ";
2437 for (
SUnit *
I : NodeOrder)
2438 dbgs() <<
" " <<
I->NodeNum <<
" ";
2445bool SwingSchedulerDAG::schedulePipeline(
SMSchedule &Schedule) {
2447 if (NodeOrder.empty()){
2452 bool scheduleFound =
false;
2453 std::unique_ptr<HighRegisterPressureDetector> HRPDetector;
2457 HRPDetector->init(RegClassInfo);
2460 for (
unsigned II = MII;
II <= MAX_II && !scheduleFound; ++
II) {
2472 int EarlyStart = INT_MIN;
2473 int LateStart = INT_MAX;
2482 dbgs() <<
format(
"\tes: %8x ls: %8x\n", EarlyStart, LateStart));
2484 if (EarlyStart > LateStart)
2485 scheduleFound =
false;
2486 else if (EarlyStart != INT_MIN && LateStart == INT_MAX)
2488 Schedule.
insert(SU, EarlyStart, EarlyStart + (
int)
II - 1,
II);
2489 else if (EarlyStart == INT_MIN && LateStart != INT_MAX)
2491 Schedule.
insert(SU, LateStart, LateStart - (
int)
II + 1,
II);
2492 else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2493 LateStart = std::min(LateStart, EarlyStart + (
int)
II - 1);
2502 scheduleFound = Schedule.
insert(SU, LateStart, EarlyStart,
II);
2504 scheduleFound = Schedule.
insert(SU, EarlyStart, LateStart,
II);
2507 scheduleFound = Schedule.
insert(SU, FirstCycle +
getASAP(SU),
2516 scheduleFound =
false;
2520 dbgs() <<
"\tCan't schedule\n";
2522 }
while (++NI != NE && scheduleFound);
2544 if (scheduleFound) {
2550 if (scheduleFound) {
2552 Pass.ORE->emit([&]() {
2555 <<
"Schedule found with Initiation Interval: "
2557 <<
", MaxStageCount: "
2568bool SwingSchedulerDAG::computeDelta(
MachineInstr &
MI,
unsigned &Delta) {
2572 bool OffsetIsScalable;
2577 if (OffsetIsScalable)
2580 if (!BaseOp->
isReg())
2588 if (BaseDef && BaseDef->
isPHI()) {
2612 unsigned &OffsetPos,
2618 unsigned BasePosLd, OffsetPosLd;
2621 Register BaseReg =
MI->getOperand(BasePosLd).getReg();
2626 if (!Phi || !
Phi->isPHI())
2635 if (!PrevDef || PrevDef ==
MI)
2641 unsigned BasePos1 = 0, OffsetPos1 = 0;
2647 int64_t LoadOffset =
MI->getOperand(OffsetPosLd).getImm();
2657 BasePos = BasePosLd;
2658 OffsetPos = OffsetPosLd;
2670 InstrChanges.find(SU);
2671 if (It != InstrChanges.end()) {
2672 std::pair<unsigned, int64_t> RegAndOffset = It->second;
2673 unsigned BasePos, OffsetPos;
2676 Register BaseReg =
MI->getOperand(BasePos).getReg();
2682 if (BaseStageNum < DefStageNum) {
2684 int OffsetDiff = DefStageNum - BaseStageNum;
2685 if (DefCycleNum < BaseCycleNum) {
2691 MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
2706 while (Def->isPHI()) {
2707 if (!Visited.
insert(Def).second)
2709 for (
unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
2710 if (Def->getOperand(i + 1).getMBB() ==
BB) {
2737 assert(SI !=
nullptr && DI !=
nullptr &&
"Expecting SUnit with an MI.");
2751 unsigned DeltaS, DeltaD;
2752 if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
2756 int64_t OffsetS, OffsetD;
2757 bool OffsetSIsScalable, OffsetDIsScalable;
2765 assert(!OffsetSIsScalable && !OffsetDIsScalable &&
2766 "Expected offsets to be byte offsets");
2770 if (!DefS || !DefD || !DefS->
isPHI() || !DefD->
isPHI())
2773 unsigned InitValS = 0;
2774 unsigned LoopValS = 0;
2775 unsigned InitValD = 0;
2776 unsigned LoopValD = 0;
2800 if (DeltaS != DeltaD || DeltaS < AccessSizeS.
getValue() ||
2804 return (OffsetS + (int64_t)AccessSizeS.
getValue() <
2805 OffsetD + (int64_t)AccessSizeD.
getValue());
2808void SwingSchedulerDAG::postProcessDAG() {
2809 for (
auto &M : Mutations)
2819 bool forward =
true;
2821 dbgs() <<
"Trying to insert node between " << StartCycle <<
" and "
2822 << EndCycle <<
" II: " <<
II <<
"\n";
2824 if (StartCycle > EndCycle)
2828 int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
2829 for (
int curCycle = StartCycle; curCycle != termCycle;
2830 forward ? ++curCycle : --curCycle) {
2835 dbgs() <<
"\tinsert at cycle " << curCycle <<
" ";
2840 ProcItinResources.reserveResources(*SU, curCycle);
2841 ScheduledInstrs[curCycle].push_back(SU);
2842 InstrToCycle.insert(std::make_pair(SU, curCycle));
2843 if (curCycle > LastCycle)
2844 LastCycle = curCycle;
2845 if (curCycle < FirstCycle)
2846 FirstCycle = curCycle;
2850 dbgs() <<
"\tfailed to insert at cycle " << curCycle <<
" ";
2862 int EarlyCycle = INT_MAX;
2863 while (!Worklist.
empty()) {
2866 if (Visited.
count(PrevSU))
2868 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
2869 if (it == InstrToCycle.end())
2871 EarlyCycle = std::min(EarlyCycle, it->second);
2872 for (
const auto &PI : PrevSU->
Preds)
2885 int LateCycle = INT_MIN;
2886 while (!Worklist.
empty()) {
2891 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
2892 if (it == InstrToCycle.end())
2894 LateCycle = std::max(LateCycle, it->second);
2895 for (
const auto &SI : SuccSU->
Succs)
2907 for (
auto &
P : SU->
Preds)
2908 if (DAG->
isBackedge(SU,
P) &&
P.getSUnit()->getInstr()->isPHI())
2909 for (
auto &S :
P.getSUnit()->Succs)
2911 return P.getSUnit();
2922 for (
int cycle =
getFirstCycle(); cycle <= LastCycle; ++cycle) {
2928 for (
unsigned i = 0, e = (
unsigned)SU->
Preds.size(); i != e; ++i) {
2934 *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2937 *MinLateStart = std::min(*MinLateStart,
End);
2942 *MinLateStart = std::min(*MinLateStart, LateStart);
2950 *MinLateStart = std::min(*MinLateStart, cycle);
2952 for (
unsigned i = 0, e = (
unsigned)SU->
Succs.size(); i != e; ++i) {
2953 if (SU->
Succs[i].getSUnit() ==
I) {
2958 *MinLateStart = std::min(*MinLateStart, LateStart);
2961 *MaxEarlyStart = std::max(*MaxEarlyStart, Start);
2966 *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2978 std::deque<SUnit *> &Insts)
const {
2980 bool OrderBeforeUse =
false;
2981 bool OrderAfterDef =
false;
2982 bool OrderBeforeDef =
false;
2983 unsigned MoveDef = 0;
2984 unsigned MoveUse = 0;
2988 for (std::deque<SUnit *>::iterator
I = Insts.begin(), E = Insts.end();
I != E;
2991 if (!MO.isReg() || !MO.getReg().isVirtual())
2995 unsigned BasePos, OffsetPos;
2996 if (ST.getInstrInfo()->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos))
2997 if (
MI->getOperand(BasePos).getReg() == Reg)
3001 std::tie(Reads,
Writes) =
3002 (*I)->getInstr()->readsWritesVirtualRegister(Reg);
3004 OrderBeforeUse =
true;
3009 OrderAfterDef =
true;
3013 OrderBeforeUse =
true;
3017 OrderAfterDef =
true;
3021 OrderBeforeUse =
true;
3025 OrderAfterDef =
true;
3030 OrderBeforeUse =
true;
3036 OrderBeforeDef =
true;
3043 for (
auto &S : SU->
Succs) {
3047 OrderBeforeUse =
true;
3056 OrderBeforeUse =
true;
3057 if ((MoveUse == 0) || (Pos < MoveUse))
3061 for (
auto &
P : SU->
Preds) {
3062 if (
P.getSUnit() != *
I)
3067 OrderAfterDef =
true;
3074 if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3075 OrderBeforeUse =
false;
3080 OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3084 if (OrderBeforeUse && OrderAfterDef) {
3085 SUnit *UseSU = Insts.at(MoveUse);
3086 SUnit *DefSU = Insts.at(MoveDef);
3087 if (MoveUse > MoveDef) {
3088 Insts.erase(Insts.begin() + MoveUse);
3089 Insts.erase(Insts.begin() + MoveDef);
3091 Insts.erase(Insts.begin() + MoveDef);
3092 Insts.erase(Insts.begin() + MoveUse);
3102 Insts.push_front(SU);
3104 Insts.push_back(SU);
3112 assert(Phi.isPHI() &&
"Expecting a Phi.");
3117 unsigned InitVal = 0;
3118 unsigned LoopVal = 0;
3119 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3127 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3145 if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3151 if (DMO.getReg() == LoopReg)
3176 for (
auto &SU : SSD->
SUnits)
3180 while (!Worklist.
empty()) {
3182 if (DoNotPipeline.
count(SU))
3185 DoNotPipeline.
insert(SU);
3186 for (
auto &Dep : SU->
Preds)
3189 for (
auto &Dep : SU->
Succs)
3193 return DoNotPipeline;
3202 int NewLastCycle = INT_MIN;
3207 NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);
3213 for (
auto &Dep : SU.
Preds)
3214 NewCycle = std::max(InstrToCycle[Dep.getSUnit()], NewCycle);
3216 int OldCycle = InstrToCycle[&SU];
3217 if (OldCycle != NewCycle) {
3218 InstrToCycle[&SU] = NewCycle;
3223 <<
") is not pipelined; moving from cycle " << OldCycle
3224 <<
" to " << NewCycle <<
" Instr:" << *SU.
getInstr());
3226 NewLastCycle = std::max(NewLastCycle, NewCycle);
3228 LastCycle = NewLastCycle;
3245 int CycleDef = InstrToCycle[&SU];
3246 assert(StageDef != -1 &&
"Instruction should have been scheduled.");
3247 for (
auto &SI : SU.
Succs)
3248 if (SI.isAssignedRegDep() && !SI.getSUnit()->isBoundaryNode())
3252 if (InstrToCycle[SI.getSUnit()] <= CycleDef)
3269void SwingSchedulerDAG::checkValidNodeOrder(
const NodeSetType &Circuits)
const {
3272 typedef std::pair<SUnit *, unsigned> UnitIndex;
3273 std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(
nullptr, 0));
3275 for (
unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
3276 Indices.push_back(std::make_pair(NodeOrder[i], i));
3278 auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3279 return std::get<0>(i1) < std::get<0>(i2);
3292 for (
unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
3293 SUnit *SU = NodeOrder[i];
3296 bool PredBefore =
false;
3297 bool SuccBefore =
false;
3306 unsigned PredIndex = std::get<1>(
3322 unsigned SuccIndex = std::get<1>(
3335 Circuits, [SU](
const NodeSet &Circuit) {
return Circuit.
count(SU); });
3340 NumNodeOrderIssues++;
3344 <<
" are scheduled before node " << SU->
NodeNum
3351 dbgs() <<
"Invalid node order found!\n";
3362 unsigned OverlapReg = 0;
3363 unsigned NewBaseReg = 0;
3364 for (
SUnit *SU : Instrs) {
3366 for (
unsigned i = 0, e =
MI->getNumOperands(); i < e; ++i) {
3374 InstrChanges.find(SU);
3375 if (It != InstrChanges.end()) {
3376 unsigned BasePos, OffsetPos;
3382 MI->getOperand(OffsetPos).getImm() - It->second.second;
3395 unsigned TiedUseIdx = 0;
3396 if (
MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3398 OverlapReg =
MI->getOperand(TiedUseIdx).getReg();
3400 NewBaseReg =
MI->getOperand(i).getReg();
3409 const std::deque<SUnit *> &Instrs)
const {
3410 std::deque<SUnit *> NewOrderPhi;
3411 for (
SUnit *SU : Instrs) {
3413 NewOrderPhi.push_back(SU);
3415 std::deque<SUnit *> NewOrderI;
3416 for (
SUnit *SU : Instrs) {
3432 std::deque<SUnit *> &cycleInstrs =
3433 ScheduledInstrs[cycle + (stage * InitiationInterval)];
3435 ScheduledInstrs[cycle].push_front(SU);
3441 for (
int cycle =
getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3442 ScheduledInstrs.erase(cycle);
3452 std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[
Cycle];
3461 os <<
"Num nodes " <<
size() <<
" rec " << RecMII <<
" mov " << MaxMOV
3462 <<
" depth " << MaxDepth <<
" col " << Colocate <<
"\n";
3463 for (
const auto &
I : Nodes)
3464 os <<
" SU(" <<
I->NodeNum <<
") " << *(
I->getInstr());
3468#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3475 for (
SUnit *CI : cycleInstrs->second) {
3477 os <<
"(" << CI->
NodeNum <<
") ";
3488void ResourceManager::dumpMRT()
const {
3492 std::stringstream SS;
3494 SS << std::setw(4) <<
"Slot";
3496 SS << std::setw(3) <<
I;
3497 SS << std::setw(7) <<
"#Mops"
3499 for (
int Slot = 0; Slot < InitiationInterval; ++Slot) {
3500 SS << std::setw(4) << Slot;
3502 SS << std::setw(3) << MRT[Slot][
I];
3503 SS << std::setw(7) << NumScheduledMops[Slot] <<
"\n";
3512 unsigned ProcResourceID = 0;
3517 "Too many kinds of resources, unsupported");
3523 if (
Desc.SubUnitsIdxBegin)
3525 Masks[
I] = 1ULL << ProcResourceID;
3531 if (!
Desc.SubUnitsIdxBegin)
3533 Masks[
I] = 1ULL << ProcResourceID;
3534 for (
unsigned U = 0; U <
Desc.NumUnits; ++U)
3535 Masks[
I] |= Masks[
Desc.SubUnitsIdxBegin[U]];
3540 dbgs() <<
"ProcResourceDesc:\n";
3543 dbgs() <<
format(
" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
3544 ProcResource->
Name,
I, Masks[
I],
3547 dbgs() <<
" -----------------\n";
3555 dbgs() <<
"canReserveResources:\n";
3558 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
3564 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
3570 reserveResources(SCDesc,
Cycle);
3571 bool Result = !isOverbooked();
3572 unreserveResources(SCDesc,
Cycle);
3581 dbgs() <<
"reserveResources:\n";
3584 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
3590 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
3596 reserveResources(SCDesc,
Cycle);
3601 dbgs() <<
"reserveResources: done!\n\n";
3612 ++MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
3615 ++NumScheduledMops[positiveModulo(
C, InitiationInterval)];
3624 --MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
3627 --NumScheduledMops[positiveModulo(
C, InitiationInterval)];
3630bool ResourceManager::isOverbooked()
const {
3632 for (
int Slot = 0;
Slot < InitiationInterval; ++
Slot) {
3635 if (MRT[Slot][
I] >
Desc->NumUnits)
3638 if (NumScheduledMops[Slot] > IssueWidth)
3644int ResourceManager::calculateResMIIDFA()
const {
3649 FuncUnitSorter FUS = FuncUnitSorter(*ST);
3651 FUS.calcCriticalResources(*SU.
getInstr());
3662 while (!FuncUnitOrder.empty()) {
3664 FuncUnitOrder.pop();
3671 unsigned ReservedCycles = 0;
3672 auto *RI = Resources.
begin();
3673 auto *RE = Resources.
end();
3675 dbgs() <<
"Trying to reserve resource for " << NumCycles
3676 <<
" cycles for \n";
3679 for (
unsigned C = 0;
C < NumCycles; ++
C)
3681 if ((*RI)->canReserveResources(*
MI)) {
3682 (*RI)->reserveResources(*
MI);
3689 <<
", NumCycles:" << NumCycles <<
"\n");
3691 for (
unsigned C = ReservedCycles;
C < NumCycles; ++
C) {
3693 <<
"NewResource created to reserve resources"
3696 assert(NewResource->canReserveResources(*
MI) &&
"Reserve error.");
3697 NewResource->reserveResources(*
MI);
3698 Resources.
push_back(std::unique_ptr<DFAPacketizer>(NewResource));
3702 int Resmii = Resources.
size();
3709 return calculateResMIIDFA();
3728 <<
" WriteProcRes: ";
3739 dbgs() <<
Desc->Name <<
": " << PRE.ReleaseAtCycle <<
", ";
3742 ResourceCount[PRE.ProcResourceIdx] += PRE.ReleaseAtCycle;
3747 int Result = (NumMops + IssueWidth - 1) / IssueWidth;
3750 dbgs() <<
"#Mops: " << NumMops <<
", "
3751 <<
"IssueWidth: " << IssueWidth <<
", "
3752 <<
"Cycles: " << Result <<
"\n";
3757 std::stringstream SS;
3758 SS << std::setw(2) <<
"ID" << std::setw(16) <<
"Name" << std::setw(10)
3759 <<
"Units" << std::setw(10) <<
"Consumed" << std::setw(10) <<
"Cycles"
3766 int Cycles = (ResourceCount[
I] +
Desc->NumUnits - 1) /
Desc->NumUnits;
3769 std::stringstream SS;
3770 SS << std::setw(2) <<
I << std::setw(16) <<
Desc->Name << std::setw(10)
3771 <<
Desc->NumUnits << std::setw(10) << ResourceCount[
I]
3772 << std::setw(10) << Cycles <<
"\n";
3776 if (Cycles > Result)
3783 InitiationInterval =
II;
3784 DFAResources.clear();
3785 DFAResources.resize(
II);
3786 for (
auto &
I : DFAResources)
3787 I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
3790 NumScheduledMops.
clear();
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file declares an analysis pass that computes CycleInfo for LLVM IR, specialized from GenericCycl...
This file defines the DenseMap class.
SmallVector< uint32_t, 0 > Writes
Rewrite Partial Register Uses
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
static cl::opt< int > SwpForceII("pipeliner-force-ii", cl::desc("Force pipeliner to use specified II."), cl::Hidden, cl::init(-1))
A command line argument to force pipeliner to use specified initial interval.
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
static cl::opt< bool > MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false), cl::desc("Use the MVE code generator for software pipelining"))
static cl::opt< int > RegPressureMargin("pipeliner-register-pressure-margin", cl::Hidden, cl::init(5), cl::desc("Margin representing the unused percentage of " "the register pressure limit"))
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
static void computeScheduledInsts(const SwingSchedulerDAG *SSD, SMSchedule &Schedule, std::vector< MachineInstr * > &OrderedInsts, DenseMap< MachineInstr *, unsigned > &Stages)
Create an instruction stream that represents a single iteration and stage of each instruction.
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::desc("Ignore RecMII"))
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
Modulo Software Pipelining
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
static bool isDependenceBarrier(MachineInstr &MI)
Return true if the instruction causes a chain between memory references before and after it.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
static unsigned getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
static cl::opt< int > SwpIISearchRange("pipeliner-ii-search-range", cl::desc("Range to search for II"), cl::Hidden, cl::init(10))
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
static cl::opt< bool > LimitRegPressure("pipeliner-register-pressure", cl::Hidden, cl::init(false), cl::desc("Limit register pressure of scheduled loop"))
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
unsigned const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PriorityQueue class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Target-Independent Code Generator Pass Configuration Options pass.
static unsigned getSize(unsigned Kind)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
Implements a dense probed hash-table based set.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
A possibly irreducible generalization of a Loop.
Itinerary data supplied by a subtarget to be used by a target.
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
bool isEmpty() const
Returns true if there are no itineraries.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
TypeSize getValue() const
BlockT * getHeader() const
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Generic base class for all target subtargets.
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
Tracking metadata reference owned by Metadata.
StringRef getString() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
instr_iterator instr_end()
Analysis pass which computes a MachineDominatorTree.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool isRegSequence() const
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
const MachineOperand & getOperand(unsigned i) const
iterator_range< filtered_mop_iterator > all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
A description of a memory reference used in the backend.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
const Value * getValue() const
Return the base address of the memory access.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
The main class in the implementation of the target independent software pipeliner pass.
const TargetInstrInfo * TII
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineDominatorTree * MDT
const MachineLoopInfo * MLI
MachineOptimizationRemarkEmitter * ORE
RegisterClassInfo RegClassInfo
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
static use_instr_iterator use_instr_end()
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
This class implements a map that also provides access to all stored values in a deterministic order.
iterator find(const KeyT &Key)
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
Expand the kernel using modulo variable expansion algorithm (MVE).
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
SUnit * getNode(unsigned i) const
void print(raw_ostream &os) const
void setRecMII(unsigned mii)
unsigned count(SUnit *SU) const
void setColocate(unsigned c)
int compareRecMII(NodeSet &RHS)
LLVM_DUMP_METHOD void dump() const
Pass interface - Implemented by all 'passes'.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A reimplementation of ModuloScheduleExpander.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Track the current register pressure at some position in the instruction stream, and remember the high...
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
Wrapper class representing virtual and physical registers.
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
int calculateResMII() const
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
void init(int II)
Initialize resources with the initiation interval II.
bool canReserveResources(SUnit &SU, int Cycle)
Check if the resources occupied by a machine instruction are available in the current state.
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Kind
These are the different kinds of scheduling dependencies.
@ Output
A register output-dependence (aka WAW).
@ Order
Any other ordering dependency.
@ Anti
A register anti-dependence (aka WAR).
@ Data
Regular data dependence (aka true-dependence).
void setLatency(unsigned Lat)
Sets the latency for this edge.
@ Barrier
An unknown scheduling barrier.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
This class represents the scheduled code.
std::deque< SUnit * > reorderInstructions(const SwingSchedulerDAG *SSD, const std::deque< SUnit * > &Instrs) const
int earliestCycleInChain(const SDep &Dep)
Return the cycle of the earliest scheduled instruction in the dependence chain.
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
SmallSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Determine transitive dependences of unpipelineable instructions.
void dump() const
Utility function used for debugging to print the schedule.
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
int latestCycleInChain(const SDep &Dep)
Return the cycle of the latest scheduled instruction in the dependence chain.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts) const
Order the instructions within a cycle so that the definitions occur before the uses.
bool isValidSchedule(SwingSchedulerDAG *SSD)
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO) const
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU, SwingSchedulerDAG *DAG) const
Return true if all scheduled predecessors are loop-carried output/order dependencies.
bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const
Return true if the scheduled Phi has a loop carried operand.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
unsigned short Latency
Node latency.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
bool hasPhysRegDefs
Has physreg defs that are being used.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
A ScheduleDAG for scheduling lists of MachineInstr.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock * BB
The block in which to insert instructions.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void dumpNode(const SUnit &SU) const override
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
void dump() const override
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an edge to be removed from the specified node N from ...
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
MachineFunction & MF
Machine function.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
typename vector_type::const_iterator iterator
void clear()
Completely clear the SetVector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
unsigned getInstrBaseReg(SUnit *SU) const
Return the new base register that was stored away for the changed instruction.
unsigned getDepth(SUnit *Node)
The depth, in the dependence graph, for a node.
int getASAP(SUnit *Node)
Return the earliest time an instruction may be scheduled.
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
void finishBlock() override
Clean up after the software pipeliner runs.
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
int getZeroLatencyDepth(SUnit *Node)
The maximum unweighted length of a path from an arbitrary node to the given node in which each edge h...
bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc=true)
Return true for an order or output dependence that is loop carried potentially.
unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep)
The distance function, which indicates that operation V of iteration I depends on operations U of ite...
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
int getMOV(SUnit *Node)
The mobility function, which the number of slots in which an instruction may be scheduled.
int getZeroLatencyHeight(SUnit *Node)
The maximum unweighted length of a path from the given node to an arbitrary node in which each edge h...
bool isBackedge(SUnit *Source, const SDep &Dep)
Return true if the dependence is a back-edge in the data dependence graph.
unsigned getHeight(SUnit *Node)
The height, in the dependence graph, for a node.
int getALAP(SUnit *Node)
Return the latest time an instruction my be scheduled.
Object returned by analyzeLoopForPipelining.
virtual bool isMVEExpanderSupported()
Return true if the target can expand pipelined schedule with modulo variable expansion.
virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const =0
Return true if the given instruction should not be pipelined and should be ignored.
virtual bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS)
Return true if the proposed schedule should used.
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don't consume any machine resources in their current form.
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
virtual DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const
Create machine specific model for scheduling.
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const
Sometimes, it is possible for the target to tell, even without aliasing information,...
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static Type * getVoidTy(LLVMContext &C)
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
The main class in the implementation of the target independent window scheduler.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
int popcount(T Value) noexcept
Count the number of set bits in a value.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
@ WS_Force
Use window algorithm after SMS algorithm fails.
@ WS_On
Turn off window algorithm.
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
cl::opt< bool > SwpEnableCopyToPhi
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
cl::opt< WindowSchedulingFlag > WindowSchedulingOption("window-sched", cl::Hidden, cl::init(WindowSchedulingFlag::WS_On), cl::desc("Set how to use window scheduling algorithm."), cl::values(clEnumValN(WindowSchedulingFlag::WS_Off, "off", "Turn off window algorithm."), clEnumValN(WindowSchedulingFlag::WS_On, "on", "Use window algorithm after SMS algorithm fails."), clEnumValN(WindowSchedulingFlag::WS_Force, "force", "Use window algorithm instead of SMS algorithm.")))
A command line argument to set the window scheduling option.
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.