74#include "llvm/Config/llvm-config.h"
104#define DEBUG_TYPE "pipeliner"
106STATISTIC(NumTrytoPipeline,
"Number of loops that we attempt to pipeline");
107STATISTIC(NumPipelined,
"Number of loops software pipelined");
108STATISTIC(NumNodeOrderIssues,
"Number of node order issues found");
109STATISTIC(NumFailBranch,
"Pipeliner abort due to unknown branch");
110STATISTIC(NumFailLoop,
"Pipeliner abort due to unsupported loop");
111STATISTIC(NumFailPreheader,
"Pipeliner abort due to missing preheader");
112STATISTIC(NumFailLargeMaxMII,
"Pipeliner abort due to MaxMII too large");
113STATISTIC(NumFailZeroMII,
"Pipeliner abort due to zero MII");
114STATISTIC(NumFailNoSchedule,
"Pipeliner abort due to no schedule found");
115STATISTIC(NumFailZeroStage,
"Pipeliner abort due to zero stage");
116STATISTIC(NumFailLargeMaxStage,
"Pipeliner abort due to too many stages");
120 cl::desc(
"Enable Software Pipelining"));
129 cl::desc(
"Size limit for the MII."),
135 cl::desc(
"Force pipeliner to use specified II."),
141 cl::desc(
"Maximum stages allowed in the generated scheduled."),
148 cl::desc(
"Prune dependences between unrelated Phi nodes."),
155 cl::desc(
"Prune loop carried order dependences."),
173 cl::desc(
"Instead of emitting the pipelined code, annotate instructions "
174 "with the generated schedule for feeding into the "
175 "-modulo-schedule-test pass"));
180 "Use the experimental peeling code generator for software pipelining"));
188 cl::desc(
"Limit register pressure of scheduled loop"));
193 cl::desc(
"Margin representing the unused percentage of "
194 "the register pressure limit"));
198 cl::desc(
"Use the MVE code generator for software pipelining"));
205 cl::desc(
"Enable CopyToPhi DAG Mutation"));
210 "pipeliner-force-issue-width",
217 cl::desc(
"Set how to use window scheduling algorithm."),
219 "Turn off window algorithm."),
221 "Use window algorithm after SMS algorithm fails."),
223 "Use window algorithm instead of SMS algorithm.")));
227unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
235 "Modulo Software Pipelining",
false,
false)
245 if (skipFunction(mf.getFunction()))
251 if (mf.getFunction().getAttributes().hasFnAttr(Attribute::OptimizeForSize) &&
255 if (!mf.getSubtarget().enableMachinePipeliner())
260 if (mf.getSubtarget().useDFAforSMS() &&
261 (!mf.getSubtarget().getInstrItineraryData() ||
262 mf.getSubtarget().getInstrItineraryData()->isEmpty()))
266 MLI = &getAnalysis<MachineLoopInfo>();
267 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
268 ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
269 TII = MF->getSubtarget().getInstrInfo();
270 RegClassInfo.runOnMachineFunction(*MF);
272 for (
const auto &L : *MLI)
282bool MachinePipeliner::scheduleLoop(
MachineLoop &L) {
283 bool Changed =
false;
284 for (
const auto &InnerLoop : L)
285 Changed |= scheduleLoop(*InnerLoop);
297 setPragmaPipelineOptions(L);
298 if (!canPipelineLoop(L)) {
302 L.getStartLoc(), L.getHeader())
303 <<
"Failed to pipeline loop";
311 if (useSwingModuloScheduler())
312 Changed = swingModuloScheduler(L);
314 if (useWindowScheduler(Changed))
315 Changed = runWindowScheduler(L);
321void MachinePipeliner::setPragmaPipelineOptions(
MachineLoop &L) {
340 if (LoopID ==
nullptr)
347 MDNode *MD = dyn_cast<MDNode>(MDO);
357 if (S->
getString() ==
"llvm.loop.pipeline.initiationinterval") {
359 "Pipeline initiation interval hint metadata should have two operands.");
361 mdconst::extract<ConstantInt>(MD->
getOperand(1))->getZExtValue();
363 }
else if (S->
getString() ==
"llvm.loop.pipeline.disable") {
372bool MachinePipeliner::canPipelineLoop(
MachineLoop &L) {
373 if (
L.getNumBlocks() != 1) {
376 L.getStartLoc(),
L.getHeader())
377 <<
"Not a single basic block: "
378 <<
ore::NV(
"NumBlocks",
L.getNumBlocks());
386 L.getStartLoc(),
L.getHeader())
387 <<
"Disabled by Pragma.";
398 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeBranch, can NOT pipeline Loop\n");
402 L.getStartLoc(),
L.getHeader())
403 <<
"The branch can't be understood";
412 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeLoop, can NOT pipeline Loop\n");
416 L.getStartLoc(),
L.getHeader())
417 <<
"The loop structure is not supported";
422 if (!
L.getLoopPreheader()) {
423 LLVM_DEBUG(
dbgs() <<
"Preheader not found, can NOT pipeline Loop\n");
427 L.getStartLoc(),
L.getHeader())
428 <<
"No loop preheader found";
434 preprocessPhiNodes(*
L.getHeader());
440 SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
445 auto *RC =
MRI.getRegClass(DefOp.
getReg());
447 for (
unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
472bool MachinePipeliner::swingModuloScheduler(
MachineLoop &L) {
473 assert(
L.getBlocks().size() == 1 &&
"SMS works on single blocks only.");
496 return SMS.hasNewSchedule();
510bool MachinePipeliner::runWindowScheduler(
MachineLoop &L) {
515 Context.
PassConfig = &getAnalysis<TargetPassConfig>();
516 Context.
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
517 Context.
LIS = &getAnalysis<LiveIntervals>();
523bool MachinePipeliner::useSwingModuloScheduler() {
528bool MachinePipeliner::useWindowScheduler(
bool Changed) {
535void SwingSchedulerDAG::setMII(
unsigned ResMII,
unsigned RecMII) {
538 else if (II_setByPragma > 0)
539 MII = II_setByPragma;
541 MII = std::max(ResMII, RecMII);
544void SwingSchedulerDAG::setMAX_II() {
547 else if (II_setByPragma > 0)
548 MAX_II = II_setByPragma;
558 addLoopCarriedDependences(AA);
559 updatePhiDependences();
566 findCircuits(NodeSets);
570 unsigned ResMII = calculateResMII();
571 unsigned RecMII = calculateRecMII(NodeSets);
579 setMII(ResMII, RecMII);
583 <<
" (rec=" << RecMII <<
", res=" << ResMII <<
")\n");
589 Pass.ORE->emit([&]() {
592 <<
"Invalid Minimal Initiation Interval: 0";
600 <<
", we don't pipeline large loops\n");
601 NumFailLargeMaxMII++;
602 Pass.ORE->emit([&]() {
605 <<
"Minimal Initiation Interval too large: "
606 <<
ore::NV(
"MII", (
int)MII) <<
" > "
608 <<
"Refer to -pipeliner-max-mii.";
613 computeNodeFunctions(NodeSets);
615 registerPressureFilter(NodeSets);
617 colocateNodeSets(NodeSets);
619 checkNodeSets(NodeSets);
622 for (
auto &
I : NodeSets) {
623 dbgs() <<
" Rec NodeSet ";
630 groupRemainingNodes(NodeSets);
632 removeDuplicateNodes(NodeSets);
635 for (
auto &
I : NodeSets) {
636 dbgs() <<
" NodeSet ";
641 computeNodeOrder(NodeSets);
644 checkValidNodeOrder(Circuits);
647 Scheduled = schedulePipeline(Schedule);
652 Pass.ORE->emit([&]() {
655 <<
"Unable to find schedule";
662 if (numStages == 0) {
665 Pass.ORE->emit([&]() {
668 <<
"No need to pipeline - no overlapped iterations in schedule.";
675 <<
" : too many stages, abort\n");
676 NumFailLargeMaxStage++;
677 Pass.ORE->emit([&]() {
680 <<
"Too many stages in schedule: "
681 <<
ore::NV(
"numStages", (
int)numStages) <<
" > "
683 <<
". Refer to -pipeliner-max-stages.";
688 Pass.ORE->emit([&]() {
691 <<
"Pipelined succesfully!";
696 std::vector<MachineInstr *> OrderedInsts;
700 OrderedInsts.push_back(SU->getInstr());
701 Cycles[SU->getInstr()] =
Cycle;
706 for (
auto &KV : NewMIs) {
707 Cycles[KV.first] = Cycles[KV.second];
708 Stages[KV.first] = Stages[KV.second];
709 NewInstrChanges[KV.first] = InstrChanges[
getSUnit(KV.first)];
716 "Cannot serialize a schedule with InstrChanges!");
740 for (
auto &KV : NewMIs)
751 unsigned &InitVal,
unsigned &LoopVal) {
752 assert(Phi.isPHI() &&
"Expecting a Phi.");
756 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
757 if (Phi.getOperand(i + 1).getMBB() !=
Loop)
758 InitVal = Phi.getOperand(i).getReg();
760 LoopVal = Phi.getOperand(i).getReg();
762 assert(InitVal != 0 && LoopVal != 0 &&
"Unexpected Phi structure.");
768 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
769 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
770 return Phi.getOperand(i).getReg();
779 while (!Worklist.
empty()) {
781 for (
const auto &SI : SU->
Succs) {
782 SUnit *SuccSU = SI.getSUnit();
784 if (Visited.
count(SuccSU))
799 return MI.isCall() ||
MI.mayRaiseFPException() ||
800 MI.hasUnmodeledSideEffects() ||
801 (
MI.hasOrderedMemoryRef() &&
802 (!
MI.mayLoad() || !
MI.isDereferenceableInvariantLoad()));
810 if (!
MI->hasOneMemOperand())
816 for (
const Value *V : Objs) {
828void SwingSchedulerDAG::addLoopCarriedDependences(
AliasAnalysis *AA) {
835 PendingLoads.
clear();
836 else if (
MI.mayLoad()) {
841 for (
const auto *V : Objs) {
845 }
else if (
MI.mayStore()) {
850 for (
const auto *V : Objs) {
852 PendingLoads.
find(V);
853 if (
I == PendingLoads.
end())
855 for (
auto *Load :
I->second) {
863 int64_t Offset1, Offset2;
864 bool Offset1IsScalable, Offset2IsScalable;
866 Offset1IsScalable,
TRI) &&
868 Offset2IsScalable,
TRI)) {
870 Offset1IsScalable == Offset2IsScalable &&
871 (
int)Offset1 < (
int)Offset2) {
873 "What happened to the chain edge?");
924void SwingSchedulerDAG::updatePhiDependences() {
932 unsigned HasPhiUse = 0;
933 unsigned HasPhiDef = 0;
957 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
962 }
else if (MO.isUse()) {
965 if (
DefMI ==
nullptr)
972 ST.adjustSchedDependency(SU, 0, &
I, MO.getOperandNo(), Dep,
979 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
988 for (
auto &PI :
I.Preds) {
991 if (
I.getInstr()->isPHI()) {
1000 for (
int i = 0, e = RemoveDeps.
size(); i != e; ++i)
1001 I.removePred(RemoveDeps[i]);
1007void SwingSchedulerDAG::changeDependences() {
1012 unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
1013 int64_t NewOffset = 0;
1014 if (!canUseLastOffsetValue(
I.getInstr(), BasePos, OffsetPos, NewBase,
1019 Register OrigBase =
I.getInstr()->getOperand(BasePos).getReg();
1039 for (
const SDep &
P :
I.Preds)
1040 if (
P.getSUnit() == DefSU)
1042 for (
int i = 0, e = Deps.
size(); i != e; i++) {
1044 I.removePred(Deps[i]);
1048 for (
auto &
P : LastSU->
Preds)
1051 for (
int i = 0, e = Deps.
size(); i != e; i++) {
1064 InstrChanges[&
I] = std::make_pair(NewBase, NewOffset);
1075 std::vector<MachineInstr *> &OrderedInsts,
1083 Stage <= LastStage; ++Stage) {
1086 Instrs[
Cycle].push_front(SU);
1093 std::deque<SUnit *> &CycleInstrs = Instrs[
Cycle];
1095 for (
SUnit *SU : CycleInstrs) {
1097 OrderedInsts.push_back(
MI);
1107struct FuncUnitSorter {
1113 : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
1121 unsigned min = UINT_MAX;
1122 if (InstrItins && !InstrItins->
isEmpty()) {
1125 InstrItins->
endStage(SchedClass))) {
1128 if (numAlternatives < min) {
1129 min = numAlternatives;
1146 if (!PRE.ReleaseAtCycle)
1150 unsigned NumUnits = ProcResource->
NumUnits;
1151 if (NumUnits < min) {
1153 F = PRE.ProcResourceIdx;
1158 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1167 unsigned SchedClass =
MI.getDesc().getSchedClass();
1168 if (InstrItins && !InstrItins->
isEmpty()) {
1171 InstrItins->
endStage(SchedClass))) {
1174 Resources[FuncUnits]++;
1189 if (!PRE.ReleaseAtCycle)
1191 Resources[PRE.ProcResourceIdx]++;
1195 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1201 unsigned MFUs1 = minFuncUnits(IS1, F1);
1202 unsigned MFUs2 = minFuncUnits(IS2, F2);
1205 return MFUs1 > MFUs2;
1210class HighRegisterPressureDetector {
1216 const unsigned PSetNum;
1222 std::vector<unsigned> InitSetPressure;
1226 std::vector<unsigned> PressureSetLimit;
1233 using OrderedInstsTy = std::vector<MachineInstr *>;
1237 static void dumpRegisterPressures(
const std::vector<unsigned> &Pressures) {
1238 if (Pressures.size() == 0) {
1242 for (
unsigned P : Pressures) {
1252 for (
auto PSetIter =
MRI.getPressureSets(
Reg); PSetIter.isValid();
1254 dbgs() << *PSetIter <<
' ';
1259 void increaseRegisterPressure(std::vector<unsigned> &Pressure,
1261 auto PSetIter =
MRI.getPressureSets(
Reg);
1262 unsigned Weight = PSetIter.getWeight();
1263 for (; PSetIter.isValid(); ++PSetIter)
1264 Pressure[*PSetIter] += Weight;
1267 void decreaseRegisterPressure(std::vector<unsigned> &Pressure,
1269 auto PSetIter =
MRI.getPressureSets(
Reg);
1270 unsigned Weight = PSetIter.getWeight();
1271 for (; PSetIter.isValid(); ++PSetIter) {
1272 auto &
P = Pressure[*PSetIter];
1274 "register pressure must be greater than or equal weight");
1281 return Reg.isPhysical() &&
TRI->isFixedRegister(MF,
Reg.asMCReg());
1285 return Reg.isVirtual() &&
MRI.getVRegDef(
Reg)->getParent() == OrigMBB;
1296 void computeLiveIn() {
1298 for (
auto &
MI : *OrigMBB) {
1299 if (
MI.isDebugInstr())
1307 if (isFixedRegister(
Reg))
1309 if (isDefinedInThisLoop(
Reg))
1315 for (
auto LiveIn : Used)
1316 increaseRegisterPressure(InitSetPressure, LiveIn);
1321 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1322 PressureSetLimit[PSet] =
TRI->getRegPressureSetLimit(MF, PSet);
1330 if (isFixedRegister(
Reg))
1335 for (
auto Reg : FixedRegs) {
1337 const int *Sets =
TRI->getRegUnitPressureSets(
Reg);
1338 for (; *Sets != -1; Sets++) {
1339 dbgs() <<
TRI->getRegPressureSetName(*Sets) <<
", ";
1345 for (
auto Reg : FixedRegs) {
1348 auto PSetIter =
MRI.getPressureSets(
Reg);
1349 unsigned Weight = PSetIter.getWeight();
1350 for (; PSetIter.isValid(); ++PSetIter) {
1351 unsigned &Limit = PressureSetLimit[*PSetIter];
1352 assert(Limit >= Weight &&
1353 "register pressure limit must be greater than or equal weight");
1356 <<
" (decreased by " << Weight <<
")\n");
1372 Instr2LastUsesTy computeLastUses(
const OrderedInstsTy &OrderedInsts,
1373 Instr2StageTy &Stages)
const {
1379 const auto UpdateTargetRegs = [
this, &TargetRegs](
Register Reg) {
1380 if (isDefinedInThisLoop(
Reg))
1386 UpdateTargetRegs(
Reg);
1388 for (
auto &
Use : ROMap.
find(
MI)->getSecond().Uses)
1389 UpdateTargetRegs(
Use.RegUnit);
1394 return Stages[
MI] +
MI->isPHI();
1399 for (
auto &
Use : ROMap.
find(
MI)->getSecond().Uses) {
1403 auto Ite = LastUseMI.
find(
Reg);
1404 if (Ite == LastUseMI.
end()) {
1405 LastUseMI[
Reg] =
MI;
1409 if (InstrScore(Orig) < InstrScore(New))
1410 LastUseMI[
Reg] = New;
1415 Instr2LastUsesTy LastUses;
1416 for (
auto &Entry : LastUseMI)
1433 std::vector<unsigned>
1434 computeMaxSetPressure(
const OrderedInstsTy &OrderedInsts,
1435 Instr2StageTy &Stages,
1436 const unsigned StageCount)
const {
1443 auto CurSetPressure = InitSetPressure;
1444 auto MaxSetPressure = InitSetPressure;
1445 auto LastUses = computeLastUses(OrderedInsts, Stages);
1448 dbgs() <<
"Ordered instructions:\n";
1450 dbgs() <<
"Stage " << Stages[
MI] <<
": ";
1455 const auto InsertReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1457 if (!
Reg.isValid() || isFixedRegister(
Reg))
1465 increaseRegisterPressure(CurSetPressure,
Reg);
1469 const auto EraseReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1471 if (!
Reg.isValid() || isFixedRegister(
Reg))
1475 if (!RegSet.contains(
Reg))
1480 decreaseRegisterPressure(CurSetPressure,
Reg);
1484 for (
unsigned I = 0;
I < StageCount;
I++) {
1486 const auto Stage = Stages[
MI];
1490 const unsigned Iter =
I - Stage;
1492 for (
auto &Def : ROMap.
find(
MI)->getSecond().Defs)
1493 InsertReg(LiveRegSets[Iter],
Def.RegUnit);
1495 for (
auto LastUse : LastUses[
MI]) {
1498 EraseReg(LiveRegSets[Iter - 1], LastUse);
1500 EraseReg(LiveRegSets[Iter], LastUse);
1504 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1505 MaxSetPressure[PSet] =
1506 std::max(MaxSetPressure[PSet], CurSetPressure[PSet]);
1509 dbgs() <<
"CurSetPressure=";
1510 dumpRegisterPressures(CurSetPressure);
1511 dbgs() <<
" iter=" << Iter <<
" stage=" << Stage <<
":";
1517 return MaxSetPressure;
1523 : OrigMBB(OrigMBB), MF(MF),
MRI(MF.getRegInfo()),
1524 TRI(MF.getSubtarget().getRegisterInfo()),
1525 PSetNum(
TRI->getNumRegPressureSets()), InitSetPressure(PSetNum, 0),
1526 PressureSetLimit(PSetNum, 0) {}
1532 if (
MI.isDebugInstr())
1534 ROMap[&
MI].collect(
MI, *
TRI,
MRI,
false,
true);
1538 computePressureSetLimit(RCI);
1544 const unsigned MaxStage)
const {
1546 "the percentage of the margin must be between 0 to 100");
1548 OrderedInstsTy OrderedInsts;
1549 Instr2StageTy Stages;
1551 const auto MaxSetPressure =
1552 computeMaxSetPressure(OrderedInsts, Stages, MaxStage + 1);
1555 dbgs() <<
"Dump MaxSetPressure:\n";
1556 for (
unsigned I = 0;
I < MaxSetPressure.size();
I++) {
1557 dbgs() <<
format(
"MaxSetPressure[%d]=%d\n",
I, MaxSetPressure[
I]);
1562 for (
unsigned PSet = 0; PSet < PSetNum; PSet++) {
1563 unsigned Limit = PressureSetLimit[PSet];
1566 <<
" Margin=" << Margin <<
"\n");
1567 if (Limit < MaxSetPressure[PSet] + Margin) {
1570 <<
"Rejected the schedule because of too high register pressure\n");
1586unsigned SwingSchedulerDAG::calculateResMII() {
1589 return RM.calculateResMII();
1598unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1599 unsigned RecMII = 0;
1601 for (
NodeSet &Nodes : NodeSets) {
1605 unsigned Delay = Nodes.getLatency();
1606 unsigned Distance = 1;
1609 unsigned CurMII = (Delay + Distance - 1) / Distance;
1610 Nodes.setRecMII(CurMII);
1611 if (CurMII > RecMII)
1622 for (
SUnit &SU : SUnits) {
1625 DepsAdded.
push_back(std::make_pair(&SU, Pred));
1627 for (std::pair<SUnit *, SDep> &
P : DepsAdded) {
1631 SUnit *TargetSU =
D.getSUnit();
1632 unsigned Reg =
D.getReg();
1633 unsigned Lat =
D.getLatency();
1642void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1646 for (
int i = 0, e = SUnits.size(); i != e; ++i) {
1649 for (
auto &SI : SUnits[i].Succs) {
1653 int N =
SI.getSUnit()->NodeNum;
1655 auto Dep = OutputDeps.
find(BackEdge);
1656 if (Dep != OutputDeps.
end()) {
1657 BackEdge = Dep->second;
1658 OutputDeps.
erase(Dep);
1660 OutputDeps[
N] = BackEdge;
1664 if (
SI.getSUnit()->isBoundaryNode() ||
SI.isArtificial() ||
1665 (
SI.getKind() ==
SDep::Anti && !
SI.getSUnit()->getInstr()->isPHI()))
1667 int N =
SI.getSUnit()->NodeNum;
1669 AdjK[i].push_back(
N);
1675 for (
auto &PI : SUnits[i].Preds) {
1676 if (!SUnits[i].getInstr()->mayStore() ||
1679 if (PI.getKind() ==
SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1680 int N = PI.getSUnit()->NodeNum;
1682 AdjK[i].push_back(
N);
1689 for (
auto &OD : OutputDeps)
1690 if (!
Added.test(OD.second)) {
1691 AdjK[OD.first].push_back(OD.second);
1692 Added.set(OD.second);
1698bool SwingSchedulerDAG::Circuits::circuit(
int V,
int S, NodeSetType &NodeSets,
1705 for (
auto W : AdjK[V]) {
1706 if (NumPaths > MaxPaths)
1716 }
else if (!Blocked.test(W)) {
1717 if (circuit(W, S, NodeSets,
1718 Node2Idx->at(W) < Node2Idx->at(V) ?
true : HasBackedge))
1726 for (
auto W : AdjK[V]) {
1737void SwingSchedulerDAG::Circuits::unblock(
int U) {
1740 while (!BU.
empty()) {
1742 assert(SI != BU.
end() &&
"Invalid B set.");
1745 if (Blocked.test(
W->NodeNum))
1746 unblock(
W->NodeNum);
1752void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1757 Circuits Cir(
SUnits, Topo);
1759 Cir.createAdjacencyStructure(
this);
1760 for (
int i = 0, e =
SUnits.size(); i != e; ++i) {
1762 Cir.circuit(i, i, NodeSets);
1798 for (
auto &Dep : SU.
Preds) {
1799 SUnit *TmpSU = Dep.getSUnit();
1811 if (PHISUs.
size() == 0 || SrcSUs.
size() == 0)
1819 for (
auto &Dep : PHISUs[
Index]->Succs) {
1823 SUnit *TmpSU = Dep.getSUnit();
1833 if (UseSUs.
size() == 0)
1838 for (
auto *
I : UseSUs) {
1839 for (
auto *Src : SrcSUs) {
1853 if (
D.isArtificial() ||
D.getSUnit()->isBoundaryNode())
1864void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1865 ScheduleInfo.resize(
SUnits.size());
1868 for (
int I : Topo) {
1876 for (
int I : Topo) {
1878 int zeroLatencyDepth = 0;
1882 if (
P.getLatency() == 0)
1887 asap = std::max(asap, (
int)(
getASAP(
pred) +
P.getLatency() -
1890 maxASAP = std::max(maxASAP, asap);
1891 ScheduleInfo[
I].ASAP = asap;
1892 ScheduleInfo[
I].ZeroLatencyDepth = zeroLatencyDepth;
1898 int zeroLatencyHeight = 0;
1913 ScheduleInfo[
I].ALAP = alap;
1914 ScheduleInfo[
I].ZeroLatencyHeight = zeroLatencyHeight;
1919 I.computeNodeSetInfo(
this);
1922 for (
unsigned i = 0; i <
SUnits.size(); i++) {
1923 dbgs() <<
"\tNode " << i <<
":\n";
1944 if (S && S->count(Pred.
getSUnit()) == 0)
1955 if (S && S->count(Succ.
getSUnit()) == 0)
1961 return !Preds.
empty();
1973 if (S && S->count(Succ.
getSUnit()) == 0)
1983 if (S && S->count(Pred.
getSUnit()) == 0)
1989 return !Succs.
empty();
2004 if (!Visited.
insert(Cur).second)
2005 return Path.contains(Cur);
2006 bool FoundPath =
false;
2007 for (
auto &SI : Cur->
Succs)
2010 computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
2011 for (
auto &PI : Cur->
Preds)
2014 computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
2029 for (
SUnit *SU : NS) {
2035 if (Reg.isVirtual())
2037 else if (
MRI.isAllocatable(Reg))
2042 for (
SUnit *SU : NS)
2046 if (Reg.isVirtual()) {
2047 if (!
Uses.count(Reg))
2050 }
else if (
MRI.isAllocatable(Reg)) {
2052 if (!
Uses.count(Unit))
2062void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
2063 for (
auto &NS : NodeSets) {
2069 RecRPTracker.init(&
MF, &RegClassInfo, &LIS,
BB,
BB->
end(),
false,
true);
2071 RecRPTracker.closeBottom();
2073 std::vector<SUnit *>
SUnits(NS.begin(), NS.end());
2075 return A->NodeNum >
B->NodeNum;
2078 for (
auto &SU :
SUnits) {
2084 RecRPTracker.setPos(std::next(CurInstI));
2088 RecRPTracker.getMaxUpwardPressureDelta(SU->
getInstr(),
nullptr, RPDelta,
2093 dbgs() <<
"Excess register pressure: SU(" << SU->
NodeNum <<
") "
2096 NS.setExceedPressure(SU);
2099 RecRPTracker.recede();
2106void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
2107 unsigned Colocate = 0;
2108 for (
int i = 0, e = NodeSets.size(); i < e; ++i) {
2113 for (
int j = i + 1;
j <
e; ++
j) {
2134void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
2139 for (
auto &NS : NodeSets) {
2140 if (NS.getRecMII() > 2)
2142 if (NS.getMaxDepth() > MII)
2151void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
2179 NodesAdded.
insert(
I.begin(),
I.end());
2188 addConnectedNodes(
I, NewSet, NodesAdded);
2189 if (!NewSet.
empty())
2190 NodeSets.push_back(NewSet);
2197 addConnectedNodes(
I, NewSet, NodesAdded);
2198 if (!NewSet.
empty())
2199 NodeSets.push_back(NewSet);
2204 if (NodesAdded.
count(&SU) == 0) {
2206 addConnectedNodes(&SU, NewSet, NodesAdded);
2207 if (!NewSet.
empty())
2208 NodeSets.push_back(NewSet);
2214void SwingSchedulerDAG::addConnectedNodes(
SUnit *SU,
NodeSet &NewSet,
2218 for (
auto &SI : SU->
Succs) {
2220 if (!
SI.isArtificial() && !
Successor->isBoundaryNode() &&
2222 addConnectedNodes(
Successor, NewSet, NodesAdded);
2224 for (
auto &PI : SU->
Preds) {
2225 SUnit *Predecessor = PI.getSUnit();
2226 if (!PI.isArtificial() && NodesAdded.
count(Predecessor) == 0)
2227 addConnectedNodes(Predecessor, NewSet, NodesAdded);
2236 for (
SUnit *SU : Set1) {
2237 if (Set2.
count(SU) != 0)
2240 return !Result.empty();
2244void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
2253 for (
SUnit *SU : *J)
2265void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
2269 J->remove_if([&](
SUnit *SUJ) {
return I->count(SUJ); });
2284void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
2288 for (
auto &Nodes : NodeSets) {
2293 R.insert(
N.begin(),
N.end());
2297 R.insert(
N.begin(),
N.end());
2305 }
else if (NodeSets.size() == 1) {
2306 for (
const auto &
N : Nodes)
2307 if (
N->Succs.size() == 0)
2313 SUnit *maxASAP =
nullptr;
2314 for (
SUnit *SU : Nodes) {
2324 while (!
R.empty()) {
2325 if (Order == TopDown) {
2329 while (!
R.empty()) {
2330 SUnit *maxHeight =
nullptr;
2343 NodeOrder.insert(maxHeight);
2345 R.remove(maxHeight);
2346 for (
const auto &
I : maxHeight->
Succs) {
2347 if (Nodes.count(
I.getSUnit()) == 0)
2349 if (NodeOrder.contains(
I.getSUnit()))
2353 R.insert(
I.getSUnit());
2356 for (
const auto &
I : maxHeight->
Preds) {
2359 if (Nodes.count(
I.getSUnit()) == 0)
2361 if (NodeOrder.contains(
I.getSUnit()))
2363 R.insert(
I.getSUnit());
2369 if (
pred_L(NodeOrder,
N, &Nodes))
2370 R.insert(
N.begin(),
N.end());
2375 while (!
R.empty()) {
2376 SUnit *maxDepth =
nullptr;
2388 NodeOrder.insert(maxDepth);
2391 if (Nodes.isExceedSU(maxDepth)) {
2394 R.insert(Nodes.getNode(0));
2397 for (
const auto &
I : maxDepth->
Preds) {
2398 if (Nodes.count(
I.getSUnit()) == 0)
2400 if (NodeOrder.contains(
I.getSUnit()))
2402 R.insert(
I.getSUnit());
2405 for (
const auto &
I : maxDepth->
Succs) {
2408 if (Nodes.count(
I.getSUnit()) == 0)
2410 if (NodeOrder.contains(
I.getSUnit()))
2412 R.insert(
I.getSUnit());
2418 if (
succ_L(NodeOrder,
N, &Nodes))
2419 R.insert(
N.begin(),
N.end());
2426 dbgs() <<
"Node order: ";
2427 for (
SUnit *
I : NodeOrder)
2428 dbgs() <<
" " <<
I->NodeNum <<
" ";
2435bool SwingSchedulerDAG::schedulePipeline(
SMSchedule &Schedule) {
2437 if (NodeOrder.empty()){
2442 bool scheduleFound =
false;
2443 std::unique_ptr<HighRegisterPressureDetector> HRPDetector;
2447 HRPDetector->init(RegClassInfo);
2450 for (
unsigned II = MII;
II <= MAX_II && !scheduleFound; ++
II) {
2462 int EarlyStart = INT_MIN;
2463 int LateStart = INT_MAX;
2466 int SchedEnd = INT_MAX;
2467 int SchedStart = INT_MIN;
2468 Schedule.
computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
2477 dbgs() <<
format(
"\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart,
2478 LateStart, SchedEnd, SchedStart);
2481 if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
2482 SchedStart > LateStart)
2483 scheduleFound =
false;
2484 else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
2485 SchedEnd = std::min(SchedEnd, EarlyStart + (
int)
II - 1);
2486 scheduleFound = Schedule.
insert(SU, EarlyStart, SchedEnd,
II);
2487 }
else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
2488 SchedStart = std::max(SchedStart, LateStart - (
int)
II + 1);
2489 scheduleFound = Schedule.
insert(SU, LateStart, SchedStart,
II);
2490 }
else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2492 std::min(SchedEnd, std::min(LateStart, EarlyStart + (
int)
II - 1));
2497 scheduleFound = Schedule.
insert(SU, SchedEnd, EarlyStart,
II);
2499 scheduleFound = Schedule.
insert(SU, EarlyStart, SchedEnd,
II);
2502 scheduleFound = Schedule.
insert(SU, FirstCycle +
getASAP(SU),
2510 scheduleFound =
false;
2514 dbgs() <<
"\tCan't schedule\n";
2516 }
while (++NI != NE && scheduleFound);
2538 if (scheduleFound) {
2544 if (scheduleFound) {
2546 Pass.ORE->emit([&]() {
2549 <<
"Schedule found with Initiation Interval: "
2551 <<
", MaxStageCount: "
2562bool SwingSchedulerDAG::computeDelta(
MachineInstr &
MI,
unsigned &Delta) {
2566 bool OffsetIsScalable;
2571 if (OffsetIsScalable)
2574 if (!BaseOp->
isReg())
2582 if (BaseDef && BaseDef->
isPHI()) {
2606 unsigned &OffsetPos,
2612 unsigned BasePosLd, OffsetPosLd;
2615 Register BaseReg =
MI->getOperand(BasePosLd).getReg();
2620 if (!Phi || !
Phi->isPHI())
2629 if (!PrevDef || PrevDef ==
MI)
2635 unsigned BasePos1 = 0, OffsetPos1 = 0;
2641 int64_t LoadOffset =
MI->getOperand(OffsetPosLd).getImm();
2651 BasePos = BasePosLd;
2652 OffsetPos = OffsetPosLd;
2664 InstrChanges.find(SU);
2665 if (It != InstrChanges.end()) {
2666 std::pair<unsigned, int64_t> RegAndOffset = It->second;
2667 unsigned BasePos, OffsetPos;
2670 Register BaseReg =
MI->getOperand(BasePos).getReg();
2676 if (BaseStageNum < DefStageNum) {
2678 int OffsetDiff = DefStageNum - BaseStageNum;
2679 if (DefCycleNum < BaseCycleNum) {
2685 MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
2700 while (Def->isPHI()) {
2701 if (!Visited.
insert(Def).second)
2703 for (
unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
2704 if (Def->getOperand(i + 1).getMBB() ==
BB) {
2731 assert(SI !=
nullptr && DI !=
nullptr &&
"Expecting SUnit with an MI.");
2745 unsigned DeltaS, DeltaD;
2746 if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
2750 int64_t OffsetS, OffsetD;
2751 bool OffsetSIsScalable, OffsetDIsScalable;
2759 assert(!OffsetSIsScalable && !OffsetDIsScalable &&
2760 "Expected offsets to be byte offsets");
2764 if (!DefS || !DefD || !DefS->
isPHI() || !DefD->
isPHI())
2767 unsigned InitValS = 0;
2768 unsigned LoopValS = 0;
2769 unsigned InitValD = 0;
2770 unsigned LoopValD = 0;
2794 if (DeltaS != DeltaD || DeltaS < AccessSizeS.
getValue() ||
2798 return (OffsetS + (int64_t)AccessSizeS.
getValue() <
2799 OffsetD + (int64_t)AccessSizeD.
getValue());
2802void SwingSchedulerDAG::postProcessDAG() {
2803 for (
auto &M : Mutations)
2813 bool forward =
true;
2815 dbgs() <<
"Trying to insert node between " << StartCycle <<
" and "
2816 << EndCycle <<
" II: " <<
II <<
"\n";
2818 if (StartCycle > EndCycle)
2822 int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
2823 for (
int curCycle = StartCycle; curCycle != termCycle;
2824 forward ? ++curCycle : --curCycle) {
2829 dbgs() <<
"\tinsert at cycle " << curCycle <<
" ";
2834 ProcItinResources.reserveResources(*SU, curCycle);
2835 ScheduledInstrs[curCycle].push_back(SU);
2836 InstrToCycle.insert(std::make_pair(SU, curCycle));
2837 if (curCycle > LastCycle)
2838 LastCycle = curCycle;
2839 if (curCycle < FirstCycle)
2840 FirstCycle = curCycle;
2844 dbgs() <<
"\tfailed to insert at cycle " << curCycle <<
" ";
2856 int EarlyCycle = INT_MAX;
2857 while (!Worklist.
empty()) {
2860 if (Visited.
count(PrevSU))
2862 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
2863 if (it == InstrToCycle.end())
2865 EarlyCycle = std::min(EarlyCycle, it->second);
2866 for (
const auto &PI : PrevSU->
Preds)
2879 int LateCycle = INT_MIN;
2880 while (!Worklist.
empty()) {
2885 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
2886 if (it == InstrToCycle.end())
2888 LateCycle = std::max(LateCycle, it->second);
2889 for (
const auto &SI : SuccSU->
Succs)
2901 for (
auto &
P : SU->
Preds)
2902 if (DAG->
isBackedge(SU,
P) &&
P.getSUnit()->getInstr()->isPHI())
2903 for (
auto &S :
P.getSUnit()->Succs)
2905 return P.getSUnit();
2912 int *MinEnd,
int *MaxStart,
int II,
2917 for (
int cycle =
getFirstCycle(); cycle <= LastCycle; ++cycle) {
2923 for (
unsigned i = 0, e = (
unsigned)SU->
Preds.size(); i != e; ++i) {
2929 *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2932 *MinEnd = std::min(*MinEnd,
End);
2937 *MinLateStart = std::min(*MinLateStart, LateStart);
2945 *MinLateStart = std::min(*MinLateStart, cycle);
2947 for (
unsigned i = 0, e = (
unsigned)SU->
Succs.size(); i != e; ++i) {
2948 if (SU->
Succs[i].getSUnit() ==
I) {
2953 *MinLateStart = std::min(*MinLateStart, LateStart);
2956 *MaxStart = std::max(*MaxStart, Start);
2961 *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2973 std::deque<SUnit *> &Insts)
const {
2975 bool OrderBeforeUse =
false;
2976 bool OrderAfterDef =
false;
2977 bool OrderBeforeDef =
false;
2978 unsigned MoveDef = 0;
2979 unsigned MoveUse = 0;
2983 for (std::deque<SUnit *>::iterator
I = Insts.begin(), E = Insts.end();
I != E;
2986 if (!MO.isReg() || !MO.getReg().isVirtual())
2990 unsigned BasePos, OffsetPos;
2991 if (ST.getInstrInfo()->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos))
2992 if (
MI->getOperand(BasePos).getReg() == Reg)
2996 std::tie(Reads,
Writes) =
2997 (*I)->getInstr()->readsWritesVirtualRegister(Reg);
2999 OrderBeforeUse =
true;
3004 OrderAfterDef =
true;
3008 OrderBeforeUse =
true;
3012 OrderAfterDef =
true;
3016 OrderBeforeUse =
true;
3020 OrderAfterDef =
true;
3025 OrderBeforeUse =
true;
3031 OrderBeforeDef =
true;
3038 for (
auto &S : SU->
Succs) {
3042 OrderBeforeUse =
true;
3050 OrderBeforeUse =
true;
3051 if ((MoveUse == 0) || (Pos < MoveUse))
3055 for (
auto &
P : SU->
Preds) {
3056 if (
P.getSUnit() != *
I)
3059 OrderAfterDef =
true;
3066 if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3067 OrderBeforeUse =
false;
3072 OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3076 if (OrderBeforeUse && OrderAfterDef) {
3077 SUnit *UseSU = Insts.at(MoveUse);
3078 SUnit *DefSU = Insts.at(MoveDef);
3079 if (MoveUse > MoveDef) {
3080 Insts.erase(Insts.begin() + MoveUse);
3081 Insts.erase(Insts.begin() + MoveDef);
3083 Insts.erase(Insts.begin() + MoveDef);
3084 Insts.erase(Insts.begin() + MoveUse);
3094 Insts.push_front(SU);
3096 Insts.push_back(SU);
3104 assert(Phi.isPHI() &&
"Expecting a Phi.");
3109 unsigned InitVal = 0;
3110 unsigned LoopVal = 0;
3111 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3119 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3137 if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3143 if (DMO.getReg() == LoopReg)
3155 for (
auto &SU : SSD->
SUnits)
3159 while (!Worklist.
empty()) {
3161 if (DoNotPipeline.
count(SU))
3164 DoNotPipeline.
insert(SU);
3165 for (
auto &Dep : SU->
Preds)
3168 for (
auto &Dep : SU->
Succs)
3172 return DoNotPipeline;
3181 int NewLastCycle = INT_MIN;
3186 NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);
3192 for (
auto &Dep : SU.
Preds)
3193 NewCycle = std::max(InstrToCycle[Dep.getSUnit()], NewCycle);
3195 int OldCycle = InstrToCycle[&SU];
3196 if (OldCycle != NewCycle) {
3197 InstrToCycle[&SU] = NewCycle;
3202 <<
") is not pipelined; moving from cycle " << OldCycle
3203 <<
" to " << NewCycle <<
" Instr:" << *SU.
getInstr());
3205 NewLastCycle = std::max(NewLastCycle, NewCycle);
3207 LastCycle = NewLastCycle;
3224 int CycleDef = InstrToCycle[&SU];
3225 assert(StageDef != -1 &&
"Instruction should have been scheduled.");
3226 for (
auto &SI : SU.
Succs)
3227 if (SI.isAssignedRegDep() && !SI.getSUnit()->isBoundaryNode())
3231 if (InstrToCycle[SI.getSUnit()] <= CycleDef)
3248void SwingSchedulerDAG::checkValidNodeOrder(
const NodeSetType &Circuits)
const {
3251 typedef std::pair<SUnit *, unsigned> UnitIndex;
3252 std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(
nullptr, 0));
3254 for (
unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
3255 Indices.push_back(std::make_pair(NodeOrder[i], i));
3257 auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3258 return std::get<0>(i1) < std::get<0>(i2);
3271 for (
unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
3272 SUnit *SU = NodeOrder[i];
3275 bool PredBefore =
false;
3276 bool SuccBefore =
false;
3285 unsigned PredIndex = std::get<1>(
3301 unsigned SuccIndex = std::get<1>(
3314 Circuits, [SU](
const NodeSet &Circuit) {
return Circuit.
count(SU); });
3319 NumNodeOrderIssues++;
3323 <<
" are scheduled before node " << SU->
NodeNum
3330 dbgs() <<
"Invalid node order found!\n";
3341 unsigned OverlapReg = 0;
3342 unsigned NewBaseReg = 0;
3343 for (
SUnit *SU : Instrs) {
3345 for (
unsigned i = 0, e =
MI->getNumOperands(); i < e; ++i) {
3353 InstrChanges.find(SU);
3354 if (It != InstrChanges.end()) {
3355 unsigned BasePos, OffsetPos;
3361 MI->getOperand(OffsetPos).getImm() - It->second.second;
3374 unsigned TiedUseIdx = 0;
3375 if (
MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3377 OverlapReg =
MI->getOperand(TiedUseIdx).getReg();
3379 NewBaseReg =
MI->getOperand(i).getReg();
3388 const std::deque<SUnit *> &Instrs)
const {
3389 std::deque<SUnit *> NewOrderPhi;
3390 for (
SUnit *SU : Instrs) {
3392 NewOrderPhi.push_back(SU);
3394 std::deque<SUnit *> NewOrderI;
3395 for (
SUnit *SU : Instrs) {
3411 std::deque<SUnit *> &cycleInstrs =
3412 ScheduledInstrs[cycle + (stage * InitiationInterval)];
3414 ScheduledInstrs[cycle].push_front(SU);
3420 for (
int cycle =
getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3421 ScheduledInstrs.erase(cycle);
3431 std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[
Cycle];
3440 os <<
"Num nodes " <<
size() <<
" rec " << RecMII <<
" mov " << MaxMOV
3441 <<
" depth " << MaxDepth <<
" col " << Colocate <<
"\n";
3442 for (
const auto &
I : Nodes)
3443 os <<
" SU(" <<
I->NodeNum <<
") " << *(
I->getInstr());
3447#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3454 for (
SUnit *CI : cycleInstrs->second) {
3456 os <<
"(" << CI->
NodeNum <<
") ";
3467void ResourceManager::dumpMRT()
const {
3471 std::stringstream SS;
3473 SS << std::setw(4) <<
"Slot";
3475 SS << std::setw(3) <<
I;
3476 SS << std::setw(7) <<
"#Mops"
3478 for (
int Slot = 0; Slot < InitiationInterval; ++Slot) {
3479 SS << std::setw(4) << Slot;
3481 SS << std::setw(3) << MRT[Slot][
I];
3482 SS << std::setw(7) << NumScheduledMops[Slot] <<
"\n";
3491 unsigned ProcResourceID = 0;
3496 "Too many kinds of resources, unsupported");
3502 if (
Desc.SubUnitsIdxBegin)
3504 Masks[
I] = 1ULL << ProcResourceID;
3510 if (!
Desc.SubUnitsIdxBegin)
3512 Masks[
I] = 1ULL << ProcResourceID;
3513 for (
unsigned U = 0; U <
Desc.NumUnits; ++U)
3514 Masks[
I] |= Masks[
Desc.SubUnitsIdxBegin[U]];
3519 dbgs() <<
"ProcResourceDesc:\n";
3522 dbgs() <<
format(
" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
3523 ProcResource->
Name,
I, Masks[
I],
3526 dbgs() <<
" -----------------\n";
3534 dbgs() <<
"canReserveResources:\n";
3537 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
3543 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
3549 reserveResources(SCDesc,
Cycle);
3550 bool Result = !isOverbooked();
3551 unreserveResources(SCDesc,
Cycle);
3560 dbgs() <<
"reserveResources:\n";
3563 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
3569 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
3575 reserveResources(SCDesc,
Cycle);
3580 dbgs() <<
"reserveResources: done!\n\n";
3591 ++MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
3594 ++NumScheduledMops[positiveModulo(
C, InitiationInterval)];
3603 --MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
3606 --NumScheduledMops[positiveModulo(
C, InitiationInterval)];
3609bool ResourceManager::isOverbooked()
const {
3611 for (
int Slot = 0;
Slot < InitiationInterval; ++
Slot) {
3614 if (MRT[Slot][
I] >
Desc->NumUnits)
3617 if (NumScheduledMops[Slot] > IssueWidth)
3623int ResourceManager::calculateResMIIDFA()
const {
3628 FuncUnitSorter FUS = FuncUnitSorter(*ST);
3630 FUS.calcCriticalResources(*SU.
getInstr());
3641 while (!FuncUnitOrder.empty()) {
3643 FuncUnitOrder.pop();
3650 unsigned ReservedCycles = 0;
3651 auto *RI = Resources.
begin();
3652 auto *RE = Resources.
end();
3654 dbgs() <<
"Trying to reserve resource for " << NumCycles
3655 <<
" cycles for \n";
3658 for (
unsigned C = 0;
C < NumCycles; ++
C)
3660 if ((*RI)->canReserveResources(*
MI)) {
3661 (*RI)->reserveResources(*
MI);
3668 <<
", NumCycles:" << NumCycles <<
"\n");
3670 for (
unsigned C = ReservedCycles;
C < NumCycles; ++
C) {
3672 <<
"NewResource created to reserve resources"
3675 assert(NewResource->canReserveResources(*
MI) &&
"Reserve error.");
3676 NewResource->reserveResources(*
MI);
3677 Resources.
push_back(std::unique_ptr<DFAPacketizer>(NewResource));
3681 int Resmii = Resources.
size();
3688 return calculateResMIIDFA();
3707 <<
" WriteProcRes: ";
3718 dbgs() <<
Desc->Name <<
": " << PRE.ReleaseAtCycle <<
", ";
3721 ResourceCount[PRE.ProcResourceIdx] += PRE.ReleaseAtCycle;
3726 int Result = (NumMops + IssueWidth - 1) / IssueWidth;
3729 dbgs() <<
"#Mops: " << NumMops <<
", "
3730 <<
"IssueWidth: " << IssueWidth <<
", "
3731 <<
"Cycles: " << Result <<
"\n";
3736 std::stringstream SS;
3737 SS << std::setw(2) <<
"ID" << std::setw(16) <<
"Name" << std::setw(10)
3738 <<
"Units" << std::setw(10) <<
"Consumed" << std::setw(10) <<
"Cycles"
3745 int Cycles = (ResourceCount[
I] +
Desc->NumUnits - 1) /
Desc->NumUnits;
3748 std::stringstream SS;
3749 SS << std::setw(2) <<
I << std::setw(16) <<
Desc->Name << std::setw(10)
3750 <<
Desc->NumUnits << std::setw(10) << ResourceCount[
I]
3751 << std::setw(10) << Cycles <<
"\n";
3755 if (Cycles > Result)
3762 InitiationInterval =
II;
3763 DFAResources.clear();
3764 DFAResources.resize(
II);
3765 for (
auto &
I : DFAResources)
3766 I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
3769 NumScheduledMops.
clear();
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file declares an analysis pass that computes CycleInfo for LLVM IR, specialized from GenericCycl...
This file defines the DenseMap class.
SmallVector< uint32_t, 0 > Writes
Rewrite Partial Register Uses
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
static cl::opt< int > SwpForceII("pipeliner-force-ii", cl::desc("Force pipeliner to use specified II."), cl::Hidden, cl::init(-1))
A command line argument to force pipeliner to use specified initial interval.
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
static cl::opt< bool > MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false), cl::desc("Use the MVE code generator for software pipelining"))
static cl::opt< int > RegPressureMargin("pipeliner-register-pressure-margin", cl::Hidden, cl::init(5), cl::desc("Margin representing the unused percentage of " "the register pressure limit"))
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
static void computeScheduledInsts(const SwingSchedulerDAG *SSD, SMSchedule &Schedule, std::vector< MachineInstr * > &OrderedInsts, DenseMap< MachineInstr *, unsigned > &Stages)
Create an instruction stream that represents a single iteration and stage of each instruction.
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::desc("Ignore RecMII"))
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
Modulo Software Pipelining
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
static bool isDependenceBarrier(MachineInstr &MI)
Return true if the instruction causes a chain between memory references before and after it.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
static unsigned getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
static cl::opt< int > SwpIISearchRange("pipeliner-ii-search-range", cl::desc("Range to search for II"), cl::Hidden, cl::init(10))
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
static cl::opt< bool > LimitRegPressure("pipeliner-register-pressure", cl::Hidden, cl::init(false), cl::desc("Limit register pressure of scheduled loop"))
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
unsigned const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PriorityQueue class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Target-Independent Code Generator Pass Configuration Options pass.
static unsigned getSize(unsigned Kind)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
Implements a dense probed hash-table based set.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
A possibly irreducible generalization of a Loop.
Itinerary data supplied by a subtarget to be used by a target.
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
bool isEmpty() const
Returns true if there are no itineraries.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
TypeSize getValue() const
BlockT * getHeader() const
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Generic base class for all target subtargets.
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
Tracking metadata reference owned by Metadata.
StringRef getString() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
instr_iterator instr_end()
Analysis pass which computes a MachineDominatorTree.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool isRegSequence() const
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
const MachineOperand & getOperand(unsigned i) const
iterator_range< filtered_mop_iterator > all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
A description of a memory reference used in the backend.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
const Value * getValue() const
Return the base address of the memory access.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
The main class in the implementation of the target independent software pipeliner pass.
const TargetInstrInfo * TII
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineDominatorTree * MDT
const MachineLoopInfo * MLI
MachineOptimizationRemarkEmitter * ORE
RegisterClassInfo RegClassInfo
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
static use_instr_iterator use_instr_end()
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
This class implements a map that also provides access to all stored values in a deterministic order.
iterator find(const KeyT &Key)
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
Expand the kernel using modulo variable expansion algorithm (MVE).
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
SUnit * getNode(unsigned i) const
void print(raw_ostream &os) const
void setRecMII(unsigned mii)
unsigned count(SUnit *SU) const
void setColocate(unsigned c)
int compareRecMII(NodeSet &RHS)
LLVM_DUMP_METHOD void dump() const
Pass interface - Implemented by all 'passes'.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A reimplementation of ModuloScheduleExpander.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Track the current register pressure at some position in the instruction stream, and remember the high...
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
Wrapper class representing virtual and physical registers.
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
int calculateResMII() const
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
void init(int II)
Initialize resources with the initiation interval II.
bool canReserveResources(SUnit &SU, int Cycle)
Check if the resources occupied by a machine instruction are available in the current state.
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Kind
These are the different kinds of scheduling dependencies.
@ Output
A register output-dependence (aka WAW).
@ Order
Any other ordering dependency.
@ Anti
A register anti-dependence (aka WAR).
@ Data
Regular data dependence (aka true-dependence).
void setLatency(unsigned Lat)
Sets the latency for this edge.
@ Barrier
An unknown scheduling barrier.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
This class represents the scheduled code.
std::deque< SUnit * > reorderInstructions(const SwingSchedulerDAG *SSD, const std::deque< SUnit * > &Instrs) const
int earliestCycleInChain(const SDep &Dep)
Return the cycle of the earliest scheduled instruction in the dependence chain.
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
SmallSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Determine transitive dependences of unpipelineable instructions.
void dump() const
Utility function used for debugging to print the schedule.
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
int latestCycleInChain(const SDep &Dep)
Return the cycle of the latest scheduled instruction in the dependence chain.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts) const
Order the instructions within a cycle so that the definitions occur before the uses.
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
bool isValidSchedule(SwingSchedulerDAG *SSD)
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO) const
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const
Return true if the scheduled Phi has a loop carried operand.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
unsigned short Latency
Node latency.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
bool hasPhysRegDefs
Has physreg defs that are being used.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
A ScheduleDAG for scheduling lists of MachineInstr.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock * BB
The block in which to insert instructions.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void dumpNode(const SUnit &SU) const override
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
void dump() const override
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an edge to be removed from the specified node N from ...
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
MachineFunction & MF
Machine function.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
typename vector_type::const_iterator iterator
void clear()
Completely clear the SetVector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
unsigned getInstrBaseReg(SUnit *SU) const
Return the new base register that was stored away for the changed instruction.
unsigned getDepth(SUnit *Node)
The depth, in the dependence graph, for a node.
int getASAP(SUnit *Node)
Return the earliest time an instruction may be scheduled.
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
void finishBlock() override
Clean up after the software pipeliner runs.
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
int getZeroLatencyDepth(SUnit *Node)
The maximum unweighted length of a path from an arbitrary node to the given node in which each edge h...
bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc=true)
Return true for an order or output dependence that is loop carried potentially.
unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep)
The distance function, which indicates that operation V of iteration I depends on operations U of ite...
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
int getMOV(SUnit *Node)
The mobility function, which the number of slots in which an instruction may be scheduled.
int getZeroLatencyHeight(SUnit *Node)
The maximum unweighted length of a path from the given node to an arbitrary node in which each edge h...
bool isBackedge(SUnit *Source, const SDep &Dep)
Return true if the dependence is a back-edge in the data dependence graph.
unsigned getHeight(SUnit *Node)
The height, in the dependence graph, for a node.
int getALAP(SUnit *Node)
Return the latest time an instruction my be scheduled.
Object returned by analyzeLoopForPipelining.
virtual bool isMVEExpanderSupported()
Return true if the target can expand pipelined schedule with modulo variable expansion.
virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const =0
Return true if the given instruction should not be pipelined and should be ignored.
virtual bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS)
Return true if the proposed schedule should used.
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don't consume any machine resources in their current form.
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
virtual DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const
Create machine specific model for scheduling.
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const
Sometimes, it is possible for the target to tell, even without aliasing information,...
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static Type * getVoidTy(LLVMContext &C)
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
The main class in the implementation of the target independent window scheduler.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
int popcount(T Value) noexcept
Count the number of set bits in a value.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
@ WS_Force
Use window algorithm after SMS algorithm fails.
@ WS_On
Turn off window algorithm.
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
cl::opt< bool > SwpEnableCopyToPhi
cl::opt< WindowSchedulingFlag > WindowSchedulingOption("window-sched", cl::Hidden, cl::init(WindowSchedulingFlag::WS_On), cl::desc("Set how to use window scheduling algorithm."), cl::values(clEnumValN(WindowSchedulingFlag::WS_Off, "off", "Turn off window algorithm."), clEnumValN(WindowSchedulingFlag::WS_On, "on", "Use window algorithm after SMS algorithm fails."), clEnumValN(WindowSchedulingFlag::WS_Force, "force", "Use window algorithm instead of SMS algorithm.")))
A command line argument to set the window scheduling option.
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Description of the encoding of one expression Op.
These values represent a non-pipelined step in the execution of an instruction.
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.
static constexpr LaneBitmask getNone()
Define a kind of processor resource that will be modeled by the scheduler.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
unsigned getNumProcResourceKinds() const
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
MachineInstr * LoopInductionVar
SmallVector< MachineOperand, 4 > BrCond
MachineInstr * LoopCompare
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > LoopPipelinerInfo
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
const TargetPassConfig * PassConfig
const MachineDominatorTree * MDT
RegisterClassInfo * RegClassInfo
const MachineLoopInfo * MLI
Store the effects of a change in pressure on things that MI scheduler cares about.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.