73#include "llvm/Config/llvm-config.h"
103#define DEBUG_TYPE "pipeliner"
105STATISTIC(NumTrytoPipeline,
"Number of loops that we attempt to pipeline");
106STATISTIC(NumPipelined,
"Number of loops software pipelined");
107STATISTIC(NumNodeOrderIssues,
"Number of node order issues found");
108STATISTIC(NumFailBranch,
"Pipeliner abort due to unknown branch");
109STATISTIC(NumFailLoop,
"Pipeliner abort due to unsupported loop");
110STATISTIC(NumFailPreheader,
"Pipeliner abort due to missing preheader");
111STATISTIC(NumFailLargeMaxMII,
"Pipeliner abort due to MaxMII too large");
112STATISTIC(NumFailZeroMII,
"Pipeliner abort due to zero MII");
113STATISTIC(NumFailNoSchedule,
"Pipeliner abort due to no schedule found");
114STATISTIC(NumFailZeroStage,
"Pipeliner abort due to zero stage");
115STATISTIC(NumFailLargeMaxStage,
"Pipeliner abort due to too many stages");
119 cl::desc(
"Enable Software Pipelining"));
128 cl::desc(
"Size limit for the MII."),
134 cl::desc(
"Force pipeliner to use specified II."),
140 cl::desc(
"Maximum stages allowed in the generated scheduled."),
147 cl::desc(
"Prune dependences between unrelated Phi nodes."),
154 cl::desc(
"Prune loop carried order dependences."),
172 cl::desc(
"Instead of emitting the pipelined code, annotate instructions "
173 "with the generated schedule for feeding into the "
174 "-modulo-schedule-test pass"));
179 "Use the experimental peeling code generator for software pipelining"));
187 cl::desc(
"Limit register pressure of scheduled loop"));
192 cl::desc(
"Margin representing the unused percentage of "
193 "the register pressure limit"));
200 cl::desc(
"Enable CopyToPhi DAG Mutation"));
205 "pipeliner-force-issue-width",
211unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
219 "Modulo Software Pipelining",
false,
false)
229 if (skipFunction(mf.getFunction()))
235 if (mf.getFunction().getAttributes().hasFnAttr(Attribute::OptimizeForSize) &&
239 if (!mf.getSubtarget().enableMachinePipeliner())
244 if (mf.getSubtarget().useDFAforSMS() &&
245 (!mf.getSubtarget().getInstrItineraryData() ||
246 mf.getSubtarget().getInstrItineraryData()->isEmpty()))
250 MLI = &getAnalysis<MachineLoopInfo>();
251 MDT = &getAnalysis<MachineDominatorTree>();
252 ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
253 TII = MF->getSubtarget().getInstrInfo();
254 RegClassInfo.runOnMachineFunction(*MF);
256 for (
const auto &L : *MLI)
266bool MachinePipeliner::scheduleLoop(
MachineLoop &L) {
267 bool Changed =
false;
268 for (
const auto &InnerLoop : L)
269 Changed |= scheduleLoop(*InnerLoop);
281 setPragmaPipelineOptions(L);
282 if (!canPipelineLoop(L)) {
286 L.getStartLoc(), L.getHeader())
287 <<
"Failed to pipeline loop";
296 Changed = swingModuloScheduler(L);
302void MachinePipeliner::setPragmaPipelineOptions(
MachineLoop &L) {
321 if (LoopID ==
nullptr)
328 MDNode *MD = dyn_cast<MDNode>(MDO);
338 if (S->
getString() ==
"llvm.loop.pipeline.initiationinterval") {
340 "Pipeline initiation interval hint metadata should have two operands.");
342 mdconst::extract<ConstantInt>(MD->
getOperand(1))->getZExtValue();
344 }
else if (S->
getString() ==
"llvm.loop.pipeline.disable") {
353bool MachinePipeliner::canPipelineLoop(
MachineLoop &L) {
354 if (
L.getNumBlocks() != 1) {
357 L.getStartLoc(),
L.getHeader())
358 <<
"Not a single basic block: "
359 <<
ore::NV(
"NumBlocks",
L.getNumBlocks());
367 L.getStartLoc(),
L.getHeader())
368 <<
"Disabled by Pragma.";
379 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeBranch, can NOT pipeline Loop\n");
383 L.getStartLoc(),
L.getHeader())
384 <<
"The branch can't be understood";
393 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeLoop, can NOT pipeline Loop\n");
397 L.getStartLoc(),
L.getHeader())
398 <<
"The loop structure is not supported";
403 if (!
L.getLoopPreheader()) {
404 LLVM_DEBUG(
dbgs() <<
"Preheader not found, can NOT pipeline Loop\n");
408 L.getStartLoc(),
L.getHeader())
409 <<
"No loop preheader found";
415 preprocessPhiNodes(*
L.getHeader());
421 SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
426 auto *RC =
MRI.getRegClass(DefOp.
getReg());
428 for (
unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
453bool MachinePipeliner::swingModuloScheduler(
MachineLoop &L) {
454 assert(
L.getBlocks().size() == 1 &&
"SMS works on single blocks only.");
477 return SMS.hasNewSchedule();
490void SwingSchedulerDAG::setMII(
unsigned ResMII,
unsigned RecMII) {
493 else if (II_setByPragma > 0)
494 MII = II_setByPragma;
496 MII = std::max(ResMII, RecMII);
499void SwingSchedulerDAG::setMAX_II() {
502 else if (II_setByPragma > 0)
503 MAX_II = II_setByPragma;
513 addLoopCarriedDependences(AA);
514 updatePhiDependences();
521 findCircuits(NodeSets);
525 unsigned ResMII = calculateResMII();
526 unsigned RecMII = calculateRecMII(NodeSets);
534 setMII(ResMII, RecMII);
538 <<
" (rec=" << RecMII <<
", res=" << ResMII <<
")\n");
544 Pass.ORE->emit([&]() {
547 <<
"Invalid Minimal Initiation Interval: 0";
555 <<
", we don't pipeline large loops\n");
556 NumFailLargeMaxMII++;
557 Pass.ORE->emit([&]() {
560 <<
"Minimal Initiation Interval too large: "
561 <<
ore::NV(
"MII", (
int)MII) <<
" > "
563 <<
"Refer to -pipeliner-max-mii.";
568 computeNodeFunctions(NodeSets);
570 registerPressureFilter(NodeSets);
572 colocateNodeSets(NodeSets);
574 checkNodeSets(NodeSets);
577 for (
auto &
I : NodeSets) {
578 dbgs() <<
" Rec NodeSet ";
585 groupRemainingNodes(NodeSets);
587 removeDuplicateNodes(NodeSets);
590 for (
auto &
I : NodeSets) {
591 dbgs() <<
" NodeSet ";
596 computeNodeOrder(NodeSets);
599 checkValidNodeOrder(Circuits);
602 Scheduled = schedulePipeline(Schedule);
607 Pass.ORE->emit([&]() {
610 <<
"Unable to find schedule";
617 if (numStages == 0) {
620 Pass.ORE->emit([&]() {
623 <<
"No need to pipeline - no overlapped iterations in schedule.";
630 <<
" : too many stages, abort\n");
631 NumFailLargeMaxStage++;
632 Pass.ORE->emit([&]() {
635 <<
"Too many stages in schedule: "
636 <<
ore::NV(
"numStages", (
int)numStages) <<
" > "
638 <<
". Refer to -pipeliner-max-stages.";
643 Pass.ORE->emit([&]() {
646 <<
"Pipelined succesfully!";
651 std::vector<MachineInstr *> OrderedInsts;
655 OrderedInsts.push_back(SU->getInstr());
656 Cycles[SU->getInstr()] =
Cycle;
661 for (
auto &KV : NewMIs) {
662 Cycles[KV.first] = Cycles[KV.second];
663 Stages[KV.first] = Stages[KV.second];
664 NewInstrChanges[KV.first] = InstrChanges[
getSUnit(KV.first)];
671 "Cannot serialize a schedule with InstrChanges!");
690 for (
auto &KV : NewMIs)
701 unsigned &InitVal,
unsigned &LoopVal) {
702 assert(Phi.isPHI() &&
"Expecting a Phi.");
706 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
707 if (Phi.getOperand(i + 1).getMBB() !=
Loop)
708 InitVal = Phi.getOperand(i).getReg();
710 LoopVal = Phi.getOperand(i).getReg();
712 assert(InitVal != 0 && LoopVal != 0 &&
"Unexpected Phi structure.");
718 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
719 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
720 return Phi.getOperand(i).getReg();
729 while (!Worklist.
empty()) {
731 for (
const auto &SI : SU->
Succs) {
732 SUnit *SuccSU = SI.getSUnit();
734 if (Visited.
count(SuccSU))
749 return MI.isCall() ||
MI.mayRaiseFPException() ||
750 MI.hasUnmodeledSideEffects() ||
751 (
MI.hasOrderedMemoryRef() &&
752 (!
MI.mayLoad() || !
MI.isDereferenceableInvariantLoad()));
760 if (!
MI->hasOneMemOperand())
766 for (
const Value *V : Objs) {
778void SwingSchedulerDAG::addLoopCarriedDependences(
AliasAnalysis *AA) {
785 PendingLoads.
clear();
786 else if (
MI.mayLoad()) {
791 for (
const auto *V : Objs) {
795 }
else if (
MI.mayStore()) {
800 for (
const auto *V : Objs) {
802 PendingLoads.
find(V);
803 if (
I == PendingLoads.
end())
805 for (
auto *Load :
I->second) {
813 int64_t Offset1, Offset2;
814 bool Offset1IsScalable, Offset2IsScalable;
816 Offset1IsScalable,
TRI) &&
818 Offset2IsScalable,
TRI)) {
820 Offset1IsScalable == Offset2IsScalable &&
821 (
int)Offset1 < (
int)Offset2) {
823 "What happened to the chain edge?");
874void SwingSchedulerDAG::updatePhiDependences() {
882 unsigned HasPhiUse = 0;
883 unsigned HasPhiDef = 0;
907 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
912 }
else if (MO.isUse()) {
915 if (
DefMI ==
nullptr)
922 ST.adjustSchedDependency(SU, 0, &
I, MO.getOperandNo(), Dep,
929 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
938 for (
auto &PI :
I.Preds) {
941 if (
I.getInstr()->isPHI()) {
950 for (
int i = 0, e = RemoveDeps.
size(); i != e; ++i)
951 I.removePred(RemoveDeps[i]);
957void SwingSchedulerDAG::changeDependences() {
962 unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
963 int64_t NewOffset = 0;
964 if (!canUseLastOffsetValue(
I.getInstr(), BasePos, OffsetPos, NewBase,
969 Register OrigBase =
I.getInstr()->getOperand(BasePos).getReg();
989 for (
const SDep &
P :
I.Preds)
990 if (
P.getSUnit() == DefSU)
992 for (
int i = 0, e = Deps.
size(); i != e; i++) {
994 I.removePred(Deps[i]);
998 for (
auto &
P : LastSU->
Preds)
1001 for (
int i = 0, e = Deps.
size(); i != e; i++) {
1014 InstrChanges[&
I] = std::make_pair(NewBase, NewOffset);
1025 std::vector<MachineInstr *> &OrderedInsts,
1033 Stage <= LastStage; ++Stage) {
1036 Instrs[
Cycle].push_front(SU);
1043 std::deque<SUnit *> &CycleInstrs = Instrs[
Cycle];
1045 for (
SUnit *SU : CycleInstrs) {
1047 OrderedInsts.push_back(
MI);
1057struct FuncUnitSorter {
1063 : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
1071 unsigned min = UINT_MAX;
1072 if (InstrItins && !InstrItins->
isEmpty()) {
1075 InstrItins->
endStage(SchedClass))) {
1078 if (numAlternatives < min) {
1079 min = numAlternatives;
1096 if (!PRE.ReleaseAtCycle)
1100 unsigned NumUnits = ProcResource->
NumUnits;
1101 if (NumUnits < min) {
1103 F = PRE.ProcResourceIdx;
1108 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1117 unsigned SchedClass =
MI.getDesc().getSchedClass();
1118 if (InstrItins && !InstrItins->
isEmpty()) {
1121 InstrItins->
endStage(SchedClass))) {
1124 Resources[FuncUnits]++;
1139 if (!PRE.ReleaseAtCycle)
1141 Resources[PRE.ProcResourceIdx]++;
1145 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1151 unsigned MFUs1 = minFuncUnits(IS1, F1);
1152 unsigned MFUs2 = minFuncUnits(IS2, F2);
1155 return MFUs1 > MFUs2;
1160class HighRegisterPressureDetector {
1166 const unsigned PSetNum;
1172 std::vector<unsigned> InitSetPressure;
1176 std::vector<unsigned> PressureSetLimit;
1183 using OrderedInstsTy = std::vector<MachineInstr *>;
1187 static void dumpRegisterPressures(
const std::vector<unsigned> &Pressures) {
1188 if (Pressures.size() == 0) {
1192 for (
unsigned P : Pressures) {
1202 for (
auto PSetIter =
MRI.getPressureSets(
Reg); PSetIter.isValid();
1204 dbgs() << *PSetIter <<
' ';
1209 void increaseRegisterPressure(std::vector<unsigned> &Pressure,
1211 auto PSetIter =
MRI.getPressureSets(
Reg);
1212 unsigned Weight = PSetIter.getWeight();
1213 for (; PSetIter.isValid(); ++PSetIter)
1214 Pressure[*PSetIter] += Weight;
1217 void decreaseRegisterPressure(std::vector<unsigned> &Pressure,
1219 auto PSetIter =
MRI.getPressureSets(
Reg);
1220 unsigned Weight = PSetIter.getWeight();
1221 for (; PSetIter.isValid(); ++PSetIter) {
1222 auto &
P = Pressure[*PSetIter];
1224 "register pressure must be greater than or equal weight");
1231 return Reg.isPhysical() &&
TRI->isFixedRegister(MF,
Reg.asMCReg());
1235 return Reg.isVirtual() &&
MRI.getVRegDef(
Reg)->getParent() == OrigMBB;
1246 void computeLiveIn() {
1248 for (
auto &
MI : *OrigMBB) {
1249 if (
MI.isDebugInstr())
1257 if (isFixedRegister(
Reg))
1259 if (isDefinedInThisLoop(
Reg))
1265 for (
auto LiveIn : Used)
1266 increaseRegisterPressure(InitSetPressure, LiveIn);
1271 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1272 PressureSetLimit[PSet] =
TRI->getRegPressureSetLimit(MF, PSet);
1280 if (isFixedRegister(
Reg))
1285 for (
auto Reg : FixedRegs) {
1287 const int *Sets =
TRI->getRegUnitPressureSets(
Reg);
1288 for (; *Sets != -1; Sets++) {
1289 dbgs() <<
TRI->getRegPressureSetName(*Sets) <<
", ";
1295 for (
auto Reg : FixedRegs) {
1298 auto PSetIter =
MRI.getPressureSets(
Reg);
1299 unsigned Weight = PSetIter.getWeight();
1300 for (; PSetIter.isValid(); ++PSetIter) {
1301 unsigned &Limit = PressureSetLimit[*PSetIter];
1302 assert(Limit >= Weight &&
1303 "register pressure limit must be greater than or equal weight");
1306 <<
" (decreased by " << Weight <<
")\n");
1322 Instr2LastUsesTy computeLastUses(
const OrderedInstsTy &OrderedInsts,
1323 Instr2StageTy &Stages)
const {
1329 const auto UpdateTargetRegs = [
this, &TargetRegs](
Register Reg) {
1330 if (isDefinedInThisLoop(
Reg))
1336 UpdateTargetRegs(
Reg);
1338 for (
auto &
Use : ROMap.
find(
MI)->getSecond().Uses)
1339 UpdateTargetRegs(
Use.RegUnit);
1344 return Stages[
MI] +
MI->isPHI();
1349 for (
auto &
Use : ROMap.
find(
MI)->getSecond().Uses) {
1353 auto Ite = LastUseMI.
find(
Reg);
1354 if (Ite == LastUseMI.
end()) {
1355 LastUseMI[
Reg] =
MI;
1359 if (InstrScore(Orig) < InstrScore(New))
1360 LastUseMI[
Reg] = New;
1365 Instr2LastUsesTy LastUses;
1366 for (
auto &Entry : LastUseMI)
1367 LastUses[Entry.second].insert(Entry.first);
1383 std::vector<unsigned>
1384 computeMaxSetPressure(
const OrderedInstsTy &OrderedInsts,
1385 Instr2StageTy &Stages,
1386 const unsigned StageCount)
const {
1393 auto CurSetPressure = InitSetPressure;
1394 auto MaxSetPressure = InitSetPressure;
1395 auto LastUses = computeLastUses(OrderedInsts, Stages);
1398 dbgs() <<
"Ordered instructions:\n";
1400 dbgs() <<
"Stage " << Stages[
MI] <<
": ";
1405 const auto InsertReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1407 if (!
Reg.isValid() || isFixedRegister(
Reg))
1415 increaseRegisterPressure(CurSetPressure,
Reg);
1419 const auto EraseReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1421 if (!
Reg.isValid() || isFixedRegister(
Reg))
1425 if (!RegSet.contains(
Reg))
1430 decreaseRegisterPressure(CurSetPressure,
Reg);
1434 for (
unsigned I = 0;
I < StageCount;
I++) {
1436 const auto Stage = Stages[
MI];
1440 const unsigned Iter =
I - Stage;
1442 for (
auto &Def : ROMap.
find(
MI)->getSecond().Defs)
1443 InsertReg(LiveRegSets[Iter],
Def.RegUnit);
1445 for (
auto LastUse : LastUses[
MI]) {
1448 EraseReg(LiveRegSets[Iter - 1], LastUse);
1450 EraseReg(LiveRegSets[Iter], LastUse);
1454 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1455 MaxSetPressure[PSet] =
1456 std::max(MaxSetPressure[PSet], CurSetPressure[PSet]);
1459 dbgs() <<
"CurSetPressure=";
1460 dumpRegisterPressures(CurSetPressure);
1461 dbgs() <<
" iter=" << Iter <<
" stage=" << Stage <<
":";
1467 return MaxSetPressure;
1473 : OrigMBB(OrigMBB), MF(MF),
MRI(MF.getRegInfo()),
1474 TRI(MF.getSubtarget().getRegisterInfo()),
1475 PSetNum(
TRI->getNumRegPressureSets()), InitSetPressure(PSetNum, 0),
1476 PressureSetLimit(PSetNum, 0) {}
1482 if (
MI.isDebugInstr())
1484 ROMap[&
MI].collect(
MI, *
TRI,
MRI,
false,
true);
1488 computePressureSetLimit(RCI);
1494 const unsigned MaxStage)
const {
1496 "the percentage of the margin must be between 0 to 100");
1498 OrderedInstsTy OrderedInsts;
1499 Instr2StageTy Stages;
1501 const auto MaxSetPressure =
1502 computeMaxSetPressure(OrderedInsts, Stages, MaxStage + 1);
1505 dbgs() <<
"Dump MaxSetPressure:\n";
1506 for (
unsigned I = 0;
I < MaxSetPressure.size();
I++) {
1507 dbgs() <<
format(
"MaxSetPressure[%d]=%d\n",
I, MaxSetPressure[
I]);
1512 for (
unsigned PSet = 0; PSet < PSetNum; PSet++) {
1513 unsigned Limit = PressureSetLimit[PSet];
1516 <<
" Margin=" << Margin <<
"\n");
1517 if (Limit < MaxSetPressure[PSet] + Margin) {
1520 <<
"Rejected the schedule because of too high register pressure\n");
1536unsigned SwingSchedulerDAG::calculateResMII() {
1539 return RM.calculateResMII();
1548unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1549 unsigned RecMII = 0;
1551 for (
NodeSet &Nodes : NodeSets) {
1555 unsigned Delay = Nodes.getLatency();
1556 unsigned Distance = 1;
1559 unsigned CurMII = (Delay + Distance - 1) / Distance;
1560 Nodes.setRecMII(CurMII);
1561 if (CurMII > RecMII)
1572 for (
SUnit &SU : SUnits) {
1575 DepsAdded.
push_back(std::make_pair(&SU, Pred));
1577 for (std::pair<SUnit *, SDep> &
P : DepsAdded) {
1581 SUnit *TargetSU =
D.getSUnit();
1582 unsigned Reg =
D.getReg();
1583 unsigned Lat =
D.getLatency();
1592void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1596 for (
int i = 0, e = SUnits.size(); i != e; ++i) {
1599 for (
auto &SI : SUnits[i].Succs) {
1603 int N =
SI.getSUnit()->NodeNum;
1605 auto Dep = OutputDeps.
find(BackEdge);
1606 if (Dep != OutputDeps.
end()) {
1607 BackEdge = Dep->second;
1608 OutputDeps.
erase(Dep);
1610 OutputDeps[
N] = BackEdge;
1614 if (
SI.getSUnit()->isBoundaryNode() ||
SI.isArtificial() ||
1615 (
SI.getKind() ==
SDep::Anti && !
SI.getSUnit()->getInstr()->isPHI()))
1617 int N =
SI.getSUnit()->NodeNum;
1619 AdjK[i].push_back(
N);
1625 for (
auto &PI : SUnits[i].Preds) {
1626 if (!SUnits[i].getInstr()->mayStore() ||
1629 if (PI.getKind() ==
SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1630 int N = PI.getSUnit()->NodeNum;
1632 AdjK[i].push_back(
N);
1639 for (
auto &OD : OutputDeps)
1640 if (!
Added.test(OD.second)) {
1641 AdjK[OD.first].push_back(OD.second);
1642 Added.set(OD.second);
1648bool SwingSchedulerDAG::Circuits::circuit(
int V,
int S, NodeSetType &NodeSets,
1655 for (
auto W : AdjK[V]) {
1656 if (NumPaths > MaxPaths)
1666 }
else if (!Blocked.test(W)) {
1667 if (circuit(W, S, NodeSets,
1668 Node2Idx->at(W) < Node2Idx->at(V) ?
true : HasBackedge))
1676 for (
auto W : AdjK[V]) {
1687void SwingSchedulerDAG::Circuits::unblock(
int U) {
1690 while (!BU.
empty()) {
1692 assert(SI != BU.
end() &&
"Invalid B set.");
1695 if (Blocked.test(
W->NodeNum))
1696 unblock(
W->NodeNum);
1702void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1707 Circuits Cir(
SUnits, Topo);
1709 Cir.createAdjacencyStructure(
this);
1710 for (
int i = 0, e =
SUnits.size(); i != e; ++i) {
1712 Cir.circuit(i, i, NodeSets);
1748 for (
auto &Dep : SU.
Preds) {
1749 SUnit *TmpSU = Dep.getSUnit();
1761 if (PHISUs.
size() == 0 || SrcSUs.
size() == 0)
1769 for (
auto &Dep : PHISUs[
Index]->Succs) {
1773 SUnit *TmpSU = Dep.getSUnit();
1783 if (UseSUs.
size() == 0)
1788 for (
auto *
I : UseSUs) {
1789 for (
auto *Src : SrcSUs) {
1803 if (
D.isArtificial() ||
D.getSUnit()->isBoundaryNode())
1814void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1815 ScheduleInfo.resize(
SUnits.size());
1818 for (
int I : Topo) {
1826 for (
int I : Topo) {
1828 int zeroLatencyDepth = 0;
1832 if (
P.getLatency() == 0)
1837 asap = std::max(asap, (
int)(
getASAP(
pred) +
P.getLatency() -
1840 maxASAP = std::max(maxASAP, asap);
1841 ScheduleInfo[
I].ASAP = asap;
1842 ScheduleInfo[
I].ZeroLatencyDepth = zeroLatencyDepth;
1848 int zeroLatencyHeight = 0;
1863 ScheduleInfo[
I].ALAP = alap;
1864 ScheduleInfo[
I].ZeroLatencyHeight = zeroLatencyHeight;
1869 I.computeNodeSetInfo(
this);
1872 for (
unsigned i = 0; i <
SUnits.size(); i++) {
1873 dbgs() <<
"\tNode " << i <<
":\n";
1894 if (S && S->count(Pred.
getSUnit()) == 0)
1905 if (S && S->count(Succ.
getSUnit()) == 0)
1911 return !Preds.
empty();
1923 if (S && S->count(Succ.
getSUnit()) == 0)
1933 if (S && S->count(Pred.
getSUnit()) == 0)
1939 return !Succs.
empty();
1954 if (!Visited.
insert(Cur).second)
1955 return Path.contains(Cur);
1956 bool FoundPath =
false;
1957 for (
auto &SI : Cur->
Succs)
1960 computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1961 for (
auto &PI : Cur->
Preds)
1964 computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1979 for (
SUnit *SU : NS) {
1985 if (Reg.isVirtual())
1987 else if (
MRI.isAllocatable(Reg))
1992 for (
SUnit *SU : NS)
1996 if (Reg.isVirtual()) {
1997 if (!
Uses.count(Reg))
2000 }
else if (
MRI.isAllocatable(Reg)) {
2002 if (!
Uses.count(Unit))
2012void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
2013 for (
auto &NS : NodeSets) {
2019 RecRPTracker.init(&
MF, &RegClassInfo, &LIS,
BB,
BB->
end(),
false,
true);
2021 RecRPTracker.closeBottom();
2023 std::vector<SUnit *>
SUnits(NS.begin(), NS.end());
2025 return A->NodeNum >
B->NodeNum;
2028 for (
auto &SU :
SUnits) {
2034 RecRPTracker.setPos(std::next(CurInstI));
2038 RecRPTracker.getMaxUpwardPressureDelta(SU->
getInstr(),
nullptr, RPDelta,
2043 dbgs() <<
"Excess register pressure: SU(" << SU->
NodeNum <<
") "
2046 NS.setExceedPressure(SU);
2049 RecRPTracker.recede();
2056void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
2057 unsigned Colocate = 0;
2058 for (
int i = 0, e = NodeSets.size(); i < e; ++i) {
2063 for (
int j = i + 1;
j <
e; ++
j) {
2084void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
2089 for (
auto &NS : NodeSets) {
2090 if (NS.getRecMII() > 2)
2092 if (NS.getMaxDepth() > MII)
2101void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
2129 NodesAdded.
insert(
I.begin(),
I.end());
2138 addConnectedNodes(
I, NewSet, NodesAdded);
2139 if (!NewSet.
empty())
2140 NodeSets.push_back(NewSet);
2147 addConnectedNodes(
I, NewSet, NodesAdded);
2148 if (!NewSet.
empty())
2149 NodeSets.push_back(NewSet);
2154 if (NodesAdded.
count(&SU) == 0) {
2156 addConnectedNodes(&SU, NewSet, NodesAdded);
2157 if (!NewSet.
empty())
2158 NodeSets.push_back(NewSet);
2164void SwingSchedulerDAG::addConnectedNodes(
SUnit *SU,
NodeSet &NewSet,
2168 for (
auto &SI : SU->
Succs) {
2170 if (!
SI.isArtificial() && !
Successor->isBoundaryNode() &&
2172 addConnectedNodes(
Successor, NewSet, NodesAdded);
2174 for (
auto &PI : SU->
Preds) {
2175 SUnit *Predecessor = PI.getSUnit();
2176 if (!PI.isArtificial() && NodesAdded.
count(Predecessor) == 0)
2177 addConnectedNodes(Predecessor, NewSet, NodesAdded);
2186 for (
SUnit *SU : Set1) {
2187 if (Set2.
count(SU) != 0)
2190 return !Result.empty();
2194void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
2203 for (
SUnit *SU : *J)
2215void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
2219 J->remove_if([&](
SUnit *SUJ) {
return I->count(SUJ); });
2234void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
2238 for (
auto &Nodes : NodeSets) {
2243 R.insert(
N.begin(),
N.end());
2247 R.insert(
N.begin(),
N.end());
2255 }
else if (NodeSets.size() == 1) {
2256 for (
const auto &
N : Nodes)
2257 if (
N->Succs.size() == 0)
2263 SUnit *maxASAP =
nullptr;
2264 for (
SUnit *SU : Nodes) {
2274 while (!
R.empty()) {
2275 if (Order == TopDown) {
2279 while (!
R.empty()) {
2280 SUnit *maxHeight =
nullptr;
2293 NodeOrder.insert(maxHeight);
2295 R.remove(maxHeight);
2296 for (
const auto &
I : maxHeight->
Succs) {
2297 if (Nodes.count(
I.getSUnit()) == 0)
2299 if (NodeOrder.contains(
I.getSUnit()))
2303 R.insert(
I.getSUnit());
2306 for (
const auto &
I : maxHeight->
Preds) {
2309 if (Nodes.count(
I.getSUnit()) == 0)
2311 if (NodeOrder.contains(
I.getSUnit()))
2313 R.insert(
I.getSUnit());
2319 if (
pred_L(NodeOrder,
N, &Nodes))
2320 R.insert(
N.begin(),
N.end());
2325 while (!
R.empty()) {
2326 SUnit *maxDepth =
nullptr;
2338 NodeOrder.insert(maxDepth);
2341 if (Nodes.isExceedSU(maxDepth)) {
2344 R.insert(Nodes.getNode(0));
2347 for (
const auto &
I : maxDepth->
Preds) {
2348 if (Nodes.count(
I.getSUnit()) == 0)
2350 if (NodeOrder.contains(
I.getSUnit()))
2352 R.insert(
I.getSUnit());
2355 for (
const auto &
I : maxDepth->
Succs) {
2358 if (Nodes.count(
I.getSUnit()) == 0)
2360 if (NodeOrder.contains(
I.getSUnit()))
2362 R.insert(
I.getSUnit());
2368 if (
succ_L(NodeOrder,
N, &Nodes))
2369 R.insert(
N.begin(),
N.end());
2376 dbgs() <<
"Node order: ";
2377 for (
SUnit *
I : NodeOrder)
2378 dbgs() <<
" " <<
I->NodeNum <<
" ";
2385bool SwingSchedulerDAG::schedulePipeline(
SMSchedule &Schedule) {
2387 if (NodeOrder.empty()){
2392 bool scheduleFound =
false;
2393 std::unique_ptr<HighRegisterPressureDetector> HRPDetector;
2397 HRPDetector->init(RegClassInfo);
2400 for (
unsigned II = MII; II <= MAX_II && !scheduleFound; ++II) {
2412 int EarlyStart = INT_MIN;
2413 int LateStart = INT_MAX;
2416 int SchedEnd = INT_MAX;
2417 int SchedStart = INT_MIN;
2418 Schedule.
computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
2427 dbgs() <<
format(
"\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart,
2428 LateStart, SchedEnd, SchedStart);
2431 if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
2432 SchedStart > LateStart)
2433 scheduleFound =
false;
2434 else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
2435 SchedEnd = std::min(SchedEnd, EarlyStart + (
int)II - 1);
2436 scheduleFound = Schedule.
insert(SU, EarlyStart, SchedEnd, II);
2437 }
else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
2438 SchedStart = std::max(SchedStart, LateStart - (
int)II + 1);
2439 scheduleFound = Schedule.
insert(SU, LateStart, SchedStart, II);
2440 }
else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2442 std::min(SchedEnd, std::min(LateStart, EarlyStart + (
int)II - 1));
2447 scheduleFound = Schedule.
insert(SU, SchedEnd, EarlyStart, II);
2449 scheduleFound = Schedule.
insert(SU, EarlyStart, SchedEnd, II);
2452 scheduleFound = Schedule.
insert(SU, FirstCycle +
getASAP(SU),
2453 FirstCycle +
getASAP(SU) + II - 1, II);
2460 scheduleFound =
false;
2464 dbgs() <<
"\tCan't schedule\n";
2466 }
while (++NI != NE && scheduleFound);
2488 if (scheduleFound) {
2494 if (scheduleFound) {
2496 Pass.ORE->emit([&]() {
2499 <<
"Schedule found with Initiation Interval: "
2501 <<
", MaxStageCount: "
2512bool SwingSchedulerDAG::computeDelta(
MachineInstr &
MI,
unsigned &Delta) {
2516 bool OffsetIsScalable;
2521 if (OffsetIsScalable)
2524 if (!BaseOp->
isReg())
2532 if (BaseDef && BaseDef->
isPHI()) {
2556 unsigned &OffsetPos,
2562 unsigned BasePosLd, OffsetPosLd;
2565 Register BaseReg =
MI->getOperand(BasePosLd).getReg();
2570 if (!Phi || !
Phi->isPHI())
2579 if (!PrevDef || PrevDef ==
MI)
2585 unsigned BasePos1 = 0, OffsetPos1 = 0;
2591 int64_t LoadOffset =
MI->getOperand(OffsetPosLd).getImm();
2601 BasePos = BasePosLd;
2602 OffsetPos = OffsetPosLd;
2614 InstrChanges.find(SU);
2615 if (It != InstrChanges.end()) {
2616 std::pair<unsigned, int64_t> RegAndOffset = It->second;
2617 unsigned BasePos, OffsetPos;
2620 Register BaseReg =
MI->getOperand(BasePos).getReg();
2626 if (BaseStageNum < DefStageNum) {
2628 int OffsetDiff = DefStageNum - BaseStageNum;
2629 if (DefCycleNum < BaseCycleNum) {
2635 MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
2650 while (Def->isPHI()) {
2651 if (!Visited.
insert(Def).second)
2653 for (
unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
2654 if (Def->getOperand(i + 1).getMBB() ==
BB) {
2681 assert(SI !=
nullptr && DI !=
nullptr &&
"Expecting SUnit with an MI.");
2695 unsigned DeltaS, DeltaD;
2696 if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
2700 int64_t OffsetS, OffsetD;
2701 bool OffsetSIsScalable, OffsetDIsScalable;
2709 assert(!OffsetSIsScalable && !OffsetDIsScalable &&
2710 "Expected offsets to be byte offsets");
2714 if (!DefS || !DefD || !DefS->
isPHI() || !DefD->
isPHI())
2717 unsigned InitValS = 0;
2718 unsigned LoopValS = 0;
2719 unsigned InitValD = 0;
2720 unsigned LoopValD = 0;
2744 if (DeltaS != DeltaD || DeltaS < AccessSizeS.
getValue() ||
2748 return (OffsetS + (int64_t)AccessSizeS.
getValue() <
2749 OffsetD + (int64_t)AccessSizeD.
getValue());
2752void SwingSchedulerDAG::postProcessDAG() {
2753 for (
auto &M : Mutations)
2763 bool forward =
true;
2765 dbgs() <<
"Trying to insert node between " << StartCycle <<
" and "
2766 << EndCycle <<
" II: " << II <<
"\n";
2768 if (StartCycle > EndCycle)
2772 int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
2773 for (
int curCycle = StartCycle; curCycle != termCycle;
2774 forward ? ++curCycle : --curCycle) {
2779 dbgs() <<
"\tinsert at cycle " << curCycle <<
" ";
2784 ProcItinResources.reserveResources(*SU, curCycle);
2785 ScheduledInstrs[curCycle].push_back(SU);
2786 InstrToCycle.insert(std::make_pair(SU, curCycle));
2787 if (curCycle > LastCycle)
2788 LastCycle = curCycle;
2789 if (curCycle < FirstCycle)
2790 FirstCycle = curCycle;
2794 dbgs() <<
"\tfailed to insert at cycle " << curCycle <<
" ";
2806 int EarlyCycle = INT_MAX;
2807 while (!Worklist.
empty()) {
2810 if (Visited.
count(PrevSU))
2812 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
2813 if (it == InstrToCycle.end())
2815 EarlyCycle = std::min(EarlyCycle, it->second);
2816 for (
const auto &PI : PrevSU->
Preds)
2829 int LateCycle = INT_MIN;
2830 while (!Worklist.
empty()) {
2835 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
2836 if (it == InstrToCycle.end())
2838 LateCycle = std::max(LateCycle, it->second);
2839 for (
const auto &SI : SuccSU->
Succs)
2851 for (
auto &
P : SU->
Preds)
2852 if (DAG->
isBackedge(SU,
P) &&
P.getSUnit()->getInstr()->isPHI())
2853 for (
auto &S :
P.getSUnit()->Succs)
2855 return P.getSUnit();
2862 int *MinEnd,
int *MaxStart,
int II,
2867 for (
int cycle =
getFirstCycle(); cycle <= LastCycle; ++cycle) {
2873 for (
unsigned i = 0, e = (
unsigned)SU->
Preds.size(); i != e; ++i) {
2879 *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2882 *MinEnd = std::min(*MinEnd,
End);
2887 *MinLateStart = std::min(*MinLateStart, LateStart);
2895 *MinLateStart = std::min(*MinLateStart, cycle);
2897 for (
unsigned i = 0, e = (
unsigned)SU->
Succs.size(); i != e; ++i) {
2898 if (SU->
Succs[i].getSUnit() ==
I) {
2903 *MinLateStart = std::min(*MinLateStart, LateStart);
2906 *MaxStart = std::max(*MaxStart, Start);
2911 *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2923 std::deque<SUnit *> &Insts)
const {
2925 bool OrderBeforeUse =
false;
2926 bool OrderAfterDef =
false;
2927 bool OrderBeforeDef =
false;
2928 unsigned MoveDef = 0;
2929 unsigned MoveUse = 0;
2933 for (std::deque<SUnit *>::iterator
I = Insts.begin(), E = Insts.end();
I != E;
2936 if (!MO.isReg() || !MO.getReg().isVirtual())
2940 unsigned BasePos, OffsetPos;
2941 if (ST.getInstrInfo()->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos))
2942 if (
MI->getOperand(BasePos).getReg() == Reg)
2946 std::tie(Reads,
Writes) =
2947 (*I)->getInstr()->readsWritesVirtualRegister(Reg);
2949 OrderBeforeUse =
true;
2954 OrderAfterDef =
true;
2958 OrderBeforeUse =
true;
2962 OrderAfterDef =
true;
2966 OrderBeforeUse =
true;
2970 OrderAfterDef =
true;
2975 OrderBeforeUse =
true;
2981 OrderBeforeDef =
true;
2988 for (
auto &S : SU->
Succs) {
2992 OrderBeforeUse =
true;
3000 OrderBeforeUse =
true;
3001 if ((MoveUse == 0) || (Pos < MoveUse))
3005 for (
auto &
P : SU->
Preds) {
3006 if (
P.getSUnit() != *
I)
3009 OrderAfterDef =
true;
3016 if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3017 OrderBeforeUse =
false;
3022 OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3026 if (OrderBeforeUse && OrderAfterDef) {
3027 SUnit *UseSU = Insts.at(MoveUse);
3028 SUnit *DefSU = Insts.at(MoveDef);
3029 if (MoveUse > MoveDef) {
3030 Insts.erase(Insts.begin() + MoveUse);
3031 Insts.erase(Insts.begin() + MoveDef);
3033 Insts.erase(Insts.begin() + MoveDef);
3034 Insts.erase(Insts.begin() + MoveUse);
3044 Insts.push_front(SU);
3046 Insts.push_back(SU);
3054 assert(Phi.isPHI() &&
"Expecting a Phi.");
3059 unsigned InitVal = 0;
3060 unsigned LoopVal = 0;
3061 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3069 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3087 if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3093 if (DMO.getReg() == LoopReg)
3105 for (
auto &SU : SSD->
SUnits)
3109 while (!Worklist.
empty()) {
3111 if (DoNotPipeline.
count(SU))
3114 DoNotPipeline.
insert(SU);
3115 for (
auto &Dep : SU->
Preds)
3118 for (
auto &Dep : SU->
Succs)
3122 return DoNotPipeline;
3131 int NewLastCycle = INT_MIN;
3136 NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);
3142 for (
auto &Dep : SU.
Preds)
3143 NewCycle = std::max(InstrToCycle[Dep.getSUnit()], NewCycle);
3145 int OldCycle = InstrToCycle[&SU];
3146 if (OldCycle != NewCycle) {
3147 InstrToCycle[&SU] = NewCycle;
3152 <<
") is not pipelined; moving from cycle " << OldCycle
3153 <<
" to " << NewCycle <<
" Instr:" << *SU.
getInstr());
3155 NewLastCycle = std::max(NewLastCycle, NewCycle);
3157 LastCycle = NewLastCycle;
3174 int CycleDef = InstrToCycle[&SU];
3175 assert(StageDef != -1 &&
"Instruction should have been scheduled.");
3176 for (
auto &SI : SU.
Succs)
3177 if (SI.isAssignedRegDep() && !SI.getSUnit()->isBoundaryNode())
3181 if (InstrToCycle[SI.getSUnit()] <= CycleDef)
3198void SwingSchedulerDAG::checkValidNodeOrder(
const NodeSetType &Circuits)
const {
3201 typedef std::pair<SUnit *, unsigned> UnitIndex;
3202 std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(
nullptr, 0));
3204 for (
unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
3205 Indices.push_back(std::make_pair(NodeOrder[i], i));
3207 auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3208 return std::get<0>(i1) < std::get<0>(i2);
3221 for (
unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
3222 SUnit *SU = NodeOrder[i];
3225 bool PredBefore =
false;
3226 bool SuccBefore =
false;
3235 unsigned PredIndex = std::get<1>(
3251 unsigned SuccIndex = std::get<1>(
3264 Circuits, [SU](
const NodeSet &Circuit) {
return Circuit.
count(SU); });
3269 NumNodeOrderIssues++;
3273 <<
" are scheduled before node " << SU->
NodeNum
3280 dbgs() <<
"Invalid node order found!\n";
3291 unsigned OverlapReg = 0;
3292 unsigned NewBaseReg = 0;
3293 for (
SUnit *SU : Instrs) {
3295 for (
unsigned i = 0, e =
MI->getNumOperands(); i < e; ++i) {
3303 InstrChanges.find(SU);
3304 if (It != InstrChanges.end()) {
3305 unsigned BasePos, OffsetPos;
3311 MI->getOperand(OffsetPos).getImm() - It->second.second;
3324 unsigned TiedUseIdx = 0;
3325 if (
MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3327 OverlapReg =
MI->getOperand(TiedUseIdx).getReg();
3329 NewBaseReg =
MI->getOperand(i).getReg();
3338 const std::deque<SUnit *> &Instrs)
const {
3339 std::deque<SUnit *> NewOrderPhi;
3340 for (
SUnit *SU : Instrs) {
3342 NewOrderPhi.push_back(SU);
3344 std::deque<SUnit *> NewOrderI;
3345 for (
SUnit *SU : Instrs) {
3361 std::deque<SUnit *> &cycleInstrs =
3362 ScheduledInstrs[cycle + (stage * InitiationInterval)];
3364 ScheduledInstrs[cycle].push_front(SU);
3370 for (
int cycle =
getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3371 ScheduledInstrs.erase(cycle);
3381 std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[
Cycle];
3390 os <<
"Num nodes " <<
size() <<
" rec " << RecMII <<
" mov " << MaxMOV
3391 <<
" depth " << MaxDepth <<
" col " << Colocate <<
"\n";
3392 for (
const auto &
I : Nodes)
3393 os <<
" SU(" <<
I->NodeNum <<
") " << *(
I->getInstr());
3397#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3404 for (
SUnit *CI : cycleInstrs->second) {
3406 os <<
"(" << CI->
NodeNum <<
") ";
3417void ResourceManager::dumpMRT()
const {
3421 std::stringstream SS;
3423 SS << std::setw(4) <<
"Slot";
3425 SS << std::setw(3) <<
I;
3426 SS << std::setw(7) <<
"#Mops"
3428 for (
int Slot = 0; Slot < InitiationInterval; ++Slot) {
3429 SS << std::setw(4) << Slot;
3431 SS << std::setw(3) << MRT[Slot][
I];
3432 SS << std::setw(7) << NumScheduledMops[Slot] <<
"\n";
3441 unsigned ProcResourceID = 0;
3446 "Too many kinds of resources, unsupported");
3452 if (
Desc.SubUnitsIdxBegin)
3454 Masks[
I] = 1ULL << ProcResourceID;
3460 if (!
Desc.SubUnitsIdxBegin)
3462 Masks[
I] = 1ULL << ProcResourceID;
3463 for (
unsigned U = 0; U <
Desc.NumUnits; ++U)
3464 Masks[
I] |= Masks[
Desc.SubUnitsIdxBegin[U]];
3469 dbgs() <<
"ProcResourceDesc:\n";
3472 dbgs() <<
format(
" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
3473 ProcResource->
Name,
I, Masks[
I],
3476 dbgs() <<
" -----------------\n";
3484 dbgs() <<
"canReserveResources:\n";
3487 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
3493 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
3499 reserveResources(SCDesc,
Cycle);
3500 bool Result = !isOverbooked();
3501 unreserveResources(SCDesc,
Cycle);
3510 dbgs() <<
"reserveResources:\n";
3513 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
3519 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
3525 reserveResources(SCDesc,
Cycle);
3530 dbgs() <<
"reserveResources: done!\n\n";
3541 ++MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
3544 ++NumScheduledMops[positiveModulo(
C, InitiationInterval)];
3553 --MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
3556 --NumScheduledMops[positiveModulo(
C, InitiationInterval)];
3559bool ResourceManager::isOverbooked()
const {
3561 for (
int Slot = 0;
Slot < InitiationInterval; ++
Slot) {
3564 if (MRT[Slot][
I] >
Desc->NumUnits)
3567 if (NumScheduledMops[Slot] > IssueWidth)
3573int ResourceManager::calculateResMIIDFA()
const {
3578 FuncUnitSorter FUS = FuncUnitSorter(*ST);
3580 FUS.calcCriticalResources(*SU.
getInstr());
3591 while (!FuncUnitOrder.empty()) {
3593 FuncUnitOrder.pop();
3600 unsigned ReservedCycles = 0;
3601 auto *RI = Resources.
begin();
3602 auto *RE = Resources.
end();
3604 dbgs() <<
"Trying to reserve resource for " << NumCycles
3605 <<
" cycles for \n";
3608 for (
unsigned C = 0;
C < NumCycles; ++
C)
3610 if ((*RI)->canReserveResources(*
MI)) {
3611 (*RI)->reserveResources(*
MI);
3618 <<
", NumCycles:" << NumCycles <<
"\n");
3620 for (
unsigned C = ReservedCycles;
C < NumCycles; ++
C) {
3622 <<
"NewResource created to reserve resources"
3625 assert(NewResource->canReserveResources(*
MI) &&
"Reserve error.");
3626 NewResource->reserveResources(*
MI);
3627 Resources.
push_back(std::unique_ptr<DFAPacketizer>(NewResource));
3631 int Resmii = Resources.
size();
3638 return calculateResMIIDFA();
3657 <<
" WriteProcRes: ";
3668 dbgs() <<
Desc->Name <<
": " << PRE.ReleaseAtCycle <<
", ";
3671 ResourceCount[PRE.ProcResourceIdx] += PRE.ReleaseAtCycle;
3676 int Result = (NumMops + IssueWidth - 1) / IssueWidth;
3679 dbgs() <<
"#Mops: " << NumMops <<
", "
3680 <<
"IssueWidth: " << IssueWidth <<
", "
3681 <<
"Cycles: " << Result <<
"\n";
3686 std::stringstream SS;
3687 SS << std::setw(2) <<
"ID" << std::setw(16) <<
"Name" << std::setw(10)
3688 <<
"Units" << std::setw(10) <<
"Consumed" << std::setw(10) <<
"Cycles"
3695 int Cycles = (ResourceCount[
I] +
Desc->NumUnits - 1) /
Desc->NumUnits;
3698 std::stringstream SS;
3699 SS << std::setw(2) <<
I << std::setw(16) <<
Desc->Name << std::setw(10)
3700 <<
Desc->NumUnits << std::setw(10) << ResourceCount[
I]
3701 << std::setw(10) << Cycles <<
"\n";
3705 if (Cycles > Result)
3712 InitiationInterval = II;
3713 DFAResources.clear();
3714 DFAResources.resize(II);
3715 for (
auto &
I : DFAResources)
3716 I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
3719 NumScheduledMops.
clear();
3720 NumScheduledMops.
resize(II);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file declares an analysis pass that computes CycleInfo for LLVM IR, specialized from GenericCycl...
This file defines the DenseMap class.
SmallVector< uint32_t, 0 > Writes
Rewrite Partial Register Uses
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
static cl::opt< int > SwpForceII("pipeliner-force-ii", cl::desc("Force pipeliner to use specified II."), cl::Hidden, cl::init(-1))
A command line argument to force pipeliner to use specified initial interval.
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
static cl::opt< int > RegPressureMargin("pipeliner-register-pressure-margin", cl::Hidden, cl::init(5), cl::desc("Margin representing the unused percentage of " "the register pressure limit"))
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
static void computeScheduledInsts(const SwingSchedulerDAG *SSD, SMSchedule &Schedule, std::vector< MachineInstr * > &OrderedInsts, DenseMap< MachineInstr *, unsigned > &Stages)
Create an instruction stream that represents a single iteration and stage of each instruction.
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::desc("Ignore RecMII"))
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
Modulo Software Pipelining
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
static bool isDependenceBarrier(MachineInstr &MI)
Return true if the instruction causes a chain between memory references before and after it.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
static unsigned getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
static cl::opt< int > SwpIISearchRange("pipeliner-ii-search-range", cl::desc("Range to search for II"), cl::Hidden, cl::init(10))
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
static cl::opt< bool > LimitRegPressure("pipeliner-register-pressure", cl::Hidden, cl::init(false), cl::desc("Limit register pressure of scheduled loop"))
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
unsigned const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PriorityQueue class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static unsigned getSize(unsigned Kind)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
Implements a dense probed hash-table based set.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
A possibly irreducible generalization of a Loop.
Itinerary data supplied by a subtarget to be used by a target.
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
bool isEmpty() const
Returns true if there are no itineraries.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
TypeSize getValue() const
BlockT * getHeader() const
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Generic base class for all target subtargets.
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
Tracking metadata reference owned by Metadata.
StringRef getString() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
instr_iterator instr_end()
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool isRegSequence() const
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
const MachineOperand & getOperand(unsigned i) const
iterator_range< filtered_mop_iterator > all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
A description of a memory reference used in the backend.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
const Value * getValue() const
Return the base address of the memory access.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
The main class in the implementation of the target independent software pipeliner pass.
const TargetInstrInfo * TII
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineOptimizationRemarkEmitter * ORE
RegisterClassInfo RegClassInfo
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
static use_instr_iterator use_instr_end()
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
This class implements a map that also provides access to all stored values in a deterministic order.
iterator find(const KeyT &Key)
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
SUnit * getNode(unsigned i) const
void print(raw_ostream &os) const
void setRecMII(unsigned mii)
unsigned count(SUnit *SU) const
void setColocate(unsigned c)
int compareRecMII(NodeSet &RHS)
LLVM_DUMP_METHOD void dump() const
Pass interface - Implemented by all 'passes'.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A reimplementation of ModuloScheduleExpander.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Track the current register pressure at some position in the instruction stream, and remember the high...
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
Wrapper class representing virtual and physical registers.
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
int calculateResMII() const
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
void init(int II)
Initialize resources with the initiation interval II.
bool canReserveResources(SUnit &SU, int Cycle)
Check if the resources occupied by a machine instruction are available in the current state.
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Kind
These are the different kinds of scheduling dependencies.
@ Output
A register output-dependence (aka WAW).
@ Order
Any other ordering dependency.
@ Anti
A register anti-dependence (aka WAR).
@ Data
Regular data dependence (aka true-dependence).
void setLatency(unsigned Lat)
Sets the latency for this edge.
@ Barrier
An unknown scheduling barrier.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
This class represents the scheduled code.
std::deque< SUnit * > reorderInstructions(const SwingSchedulerDAG *SSD, const std::deque< SUnit * > &Instrs) const
int earliestCycleInChain(const SDep &Dep)
Return the cycle of the earliest scheduled instruction in the dependence chain.
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
SmallSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Determine transitive dependences of unpipelineable instructions.
void dump() const
Utility function used for debugging to print the schedule.
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
int latestCycleInChain(const SDep &Dep)
Return the cycle of the latest scheduled instruction in the dependence chain.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts) const
Order the instructions within a cycle so that the definitions occur before the uses.
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
bool isValidSchedule(SwingSchedulerDAG *SSD)
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO) const
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const
Return true if the scheduled Phi has a loop carried operand.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
unsigned short Latency
Node latency.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
bool hasPhysRegDefs
Has physreg defs that are being used.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
A ScheduleDAG for scheduling lists of MachineInstr.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock * BB
The block in which to insert instructions.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void dumpNode(const SUnit &SU) const override
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
void dump() const override
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an edge to be removed from the specified node N from ...
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
MachineFunction & MF
Machine function.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
typename vector_type::const_iterator iterator
void clear()
Completely clear the SetVector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
unsigned getInstrBaseReg(SUnit *SU) const
Return the new base register that was stored away for the changed instruction.
unsigned getDepth(SUnit *Node)
The depth, in the dependence graph, for a node.
int getASAP(SUnit *Node)
Return the earliest time an instruction may be scheduled.
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
void finishBlock() override
Clean up after the software pipeliner runs.
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
int getZeroLatencyDepth(SUnit *Node)
The maximum unweighted length of a path from an arbitrary node to the given node in which each edge h...
bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc=true)
Return true for an order or output dependence that is loop carried potentially.
unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep)
The distance function, which indicates that operation V of iteration I depends on operations U of ite...
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
int getMOV(SUnit *Node)
The mobility function, which the number of slots in which an instruction may be scheduled.
int getZeroLatencyHeight(SUnit *Node)
The maximum unweighted length of a path from the given node to an arbitrary node in which each edge h...
bool isBackedge(SUnit *Source, const SDep &Dep)
Return true if the dependence is a back-edge in the data dependence graph.
unsigned getHeight(SUnit *Node)
The height, in the dependence graph, for a node.
int getALAP(SUnit *Node)
Return the latest time an instruction my be scheduled.
Object returned by analyzeLoopForPipelining.
virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const =0
Return true if the given instruction should not be pipelined and should be ignored.
virtual bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS)
Return true if the proposed schedule should used.
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don't consume any machine resources in their current form.
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
virtual DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const
Create machine specific model for scheduling.
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const
Sometimes, it is possible for the target to tell, even without aliasing information,...
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static Type * getVoidTy(LLVMContext &C)
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
int popcount(T Value) noexcept
Count the number of set bits in a value.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
cl::opt< bool > SwpEnableCopyToPhi
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Description of the encoding of one expression Op.
These values represent a non-pipelined step in the execution of an instruction.
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.
static constexpr LaneBitmask getNone()
Define a kind of processor resource that will be modeled by the scheduler.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
unsigned getNumProcResourceKinds() const
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
MachineInstr * LoopInductionVar
SmallVector< MachineOperand, 4 > BrCond
MachineInstr * LoopCompare
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > LoopPipelinerInfo
Store the effects of a change in pressure on things that MI scheduler cares about.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.