73#include "llvm/Config/llvm-config.h"
101#define DEBUG_TYPE "pipeliner"
103STATISTIC(NumTrytoPipeline,
"Number of loops that we attempt to pipeline");
104STATISTIC(NumPipelined,
"Number of loops software pipelined");
105STATISTIC(NumNodeOrderIssues,
"Number of node order issues found");
106STATISTIC(NumFailBranch,
"Pipeliner abort due to unknown branch");
107STATISTIC(NumFailLoop,
"Pipeliner abort due to unsupported loop");
108STATISTIC(NumFailPreheader,
"Pipeliner abort due to missing preheader");
109STATISTIC(NumFailLargeMaxMII,
"Pipeliner abort due to MaxMII too large");
110STATISTIC(NumFailZeroMII,
"Pipeliner abort due to zero MII");
111STATISTIC(NumFailNoSchedule,
"Pipeliner abort due to no schedule found");
112STATISTIC(NumFailZeroStage,
"Pipeliner abort due to zero stage");
113STATISTIC(NumFailLargeMaxStage,
"Pipeliner abort due to too many stages");
117 cl::desc(
"Enable Software Pipelining"));
126 cl::desc(
"Size limit for the MII."),
132 cl::desc(
"Force pipeliner to use specified II."),
138 cl::desc(
"Maximum stages allowed in the generated scheduled."),
145 cl::desc(
"Prune dependences between unrelated Phi nodes."),
152 cl::desc(
"Prune loop carried order dependences."),
170 cl::desc(
"Instead of emitting the pipelined code, annotate instructions "
171 "with the generated schedule for feeding into the "
172 "-modulo-schedule-test pass"));
177 "Use the experimental peeling code generator for software pipelining"));
185 cl::desc(
"Limit register pressure of scheduled loop"));
190 cl::desc(
"Margin representing the unused percentage of "
191 "the register pressure limit"));
195 cl::desc(
"Use the MVE code generator for software pipelining"));
202 cl::desc(
"Enable CopyToPhi DAG Mutation"));
207 "pipeliner-force-issue-width",
214 cl::desc(
"Set how to use window scheduling algorithm."),
216 "Turn off window algorithm."),
218 "Use window algorithm after SMS algorithm fails."),
220 "Use window algorithm instead of SMS algorithm.")));
224unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
232 "Modulo Software Pipelining",
false,
false)
242 if (skipFunction(mf.getFunction()))
248 if (mf.getFunction().getAttributes().hasFnAttr(Attribute::OptimizeForSize) &&
252 if (!mf.getSubtarget().enableMachinePipeliner())
257 if (mf.getSubtarget().useDFAforSMS() &&
258 (!mf.getSubtarget().getInstrItineraryData() ||
259 mf.getSubtarget().getInstrItineraryData()->isEmpty()))
263 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
264 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
265 ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
266 TII = MF->getSubtarget().getInstrInfo();
267 RegClassInfo.runOnMachineFunction(*MF);
269 for (
const auto &L : *MLI)
279bool MachinePipeliner::scheduleLoop(
MachineLoop &L) {
280 bool Changed =
false;
281 for (
const auto &InnerLoop : L)
282 Changed |= scheduleLoop(*InnerLoop);
294 setPragmaPipelineOptions(L);
295 if (!canPipelineLoop(L)) {
299 L.getStartLoc(), L.getHeader())
300 <<
"Failed to pipeline loop";
308 if (useSwingModuloScheduler())
309 Changed = swingModuloScheduler(L);
311 if (useWindowScheduler(Changed))
312 Changed = runWindowScheduler(L);
318void MachinePipeliner::setPragmaPipelineOptions(
MachineLoop &L) {
337 if (LoopID ==
nullptr)
344 MDNode *MD = dyn_cast<MDNode>(MDO);
354 if (S->
getString() ==
"llvm.loop.pipeline.initiationinterval") {
356 "Pipeline initiation interval hint metadata should have two operands.");
358 mdconst::extract<ConstantInt>(MD->
getOperand(1))->getZExtValue();
360 }
else if (S->
getString() ==
"llvm.loop.pipeline.disable") {
369bool MachinePipeliner::canPipelineLoop(
MachineLoop &L) {
370 if (
L.getNumBlocks() != 1) {
373 L.getStartLoc(),
L.getHeader())
374 <<
"Not a single basic block: "
375 <<
ore::NV(
"NumBlocks",
L.getNumBlocks());
383 L.getStartLoc(),
L.getHeader())
384 <<
"Disabled by Pragma.";
395 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeBranch, can NOT pipeline Loop\n");
399 L.getStartLoc(),
L.getHeader())
400 <<
"The branch can't be understood";
409 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeLoop, can NOT pipeline Loop\n");
413 L.getStartLoc(),
L.getHeader())
414 <<
"The loop structure is not supported";
419 if (!
L.getLoopPreheader()) {
420 LLVM_DEBUG(
dbgs() <<
"Preheader not found, can NOT pipeline Loop\n");
424 L.getStartLoc(),
L.getHeader())
425 <<
"No loop preheader found";
431 preprocessPhiNodes(*
L.getHeader());
438 *getAnalysis<LiveIntervalsWrapperPass>().getLIS().getSlotIndexes();
443 auto *RC =
MRI.getRegClass(DefOp.
getReg());
445 for (
unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
470bool MachinePipeliner::swingModuloScheduler(
MachineLoop &L) {
471 assert(
L.getBlocks().size() == 1 &&
"SMS works on single blocks only.");
474 *
this, L, getAnalysis<LiveIntervalsWrapperPass>().getLIS(),
RegClassInfo,
495 return SMS.hasNewSchedule();
509bool MachinePipeliner::runWindowScheduler(
MachineLoop &L) {
514 Context.
PassConfig = &getAnalysis<TargetPassConfig>();
515 Context.
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
516 Context.
LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
522bool MachinePipeliner::useSwingModuloScheduler() {
527bool MachinePipeliner::useWindowScheduler(
bool Changed) {
534 "llvm.loop.pipeline.initiationinterval is set.\n");
542void SwingSchedulerDAG::setMII(
unsigned ResMII,
unsigned RecMII) {
545 else if (II_setByPragma > 0)
546 MII = II_setByPragma;
548 MII = std::max(ResMII, RecMII);
551void SwingSchedulerDAG::setMAX_II() {
554 else if (II_setByPragma > 0)
555 MAX_II = II_setByPragma;
565 addLoopCarriedDependences(AA);
566 updatePhiDependences();
573 findCircuits(NodeSets);
577 unsigned ResMII = calculateResMII();
578 unsigned RecMII = calculateRecMII(NodeSets);
586 setMII(ResMII, RecMII);
590 <<
" (rec=" << RecMII <<
", res=" << ResMII <<
")\n");
596 Pass.ORE->emit([&]() {
599 <<
"Invalid Minimal Initiation Interval: 0";
607 <<
", we don't pipeline large loops\n");
608 NumFailLargeMaxMII++;
609 Pass.ORE->emit([&]() {
612 <<
"Minimal Initiation Interval too large: "
613 <<
ore::NV(
"MII", (
int)MII) <<
" > "
615 <<
"Refer to -pipeliner-max-mii.";
620 computeNodeFunctions(NodeSets);
622 registerPressureFilter(NodeSets);
624 colocateNodeSets(NodeSets);
626 checkNodeSets(NodeSets);
629 for (
auto &
I : NodeSets) {
630 dbgs() <<
" Rec NodeSet ";
637 groupRemainingNodes(NodeSets);
639 removeDuplicateNodes(NodeSets);
642 for (
auto &
I : NodeSets) {
643 dbgs() <<
" NodeSet ";
648 computeNodeOrder(NodeSets);
651 checkValidNodeOrder(Circuits);
654 Scheduled = schedulePipeline(Schedule);
659 Pass.ORE->emit([&]() {
662 <<
"Unable to find schedule";
669 if (numStages == 0) {
672 Pass.ORE->emit([&]() {
675 <<
"No need to pipeline - no overlapped iterations in schedule.";
682 <<
" : too many stages, abort\n");
683 NumFailLargeMaxStage++;
684 Pass.ORE->emit([&]() {
687 <<
"Too many stages in schedule: "
688 <<
ore::NV(
"numStages", (
int)numStages) <<
" > "
690 <<
". Refer to -pipeliner-max-stages.";
695 Pass.ORE->emit([&]() {
698 <<
"Pipelined succesfully!";
703 std::vector<MachineInstr *> OrderedInsts;
707 OrderedInsts.push_back(SU->getInstr());
708 Cycles[SU->getInstr()] =
Cycle;
713 for (
auto &KV : NewMIs) {
714 Cycles[KV.first] = Cycles[KV.second];
715 Stages[KV.first] = Stages[KV.second];
716 NewInstrChanges[KV.first] = InstrChanges[
getSUnit(KV.first)];
723 "Cannot serialize a schedule with InstrChanges!");
747 for (
auto &KV : NewMIs)
758 unsigned &InitVal,
unsigned &LoopVal) {
759 assert(Phi.isPHI() &&
"Expecting a Phi.");
763 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
764 if (Phi.getOperand(i + 1).getMBB() !=
Loop)
765 InitVal = Phi.getOperand(i).getReg();
767 LoopVal = Phi.getOperand(i).getReg();
769 assert(InitVal != 0 && LoopVal != 0 &&
"Unexpected Phi structure.");
775 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
776 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
777 return Phi.getOperand(i).getReg();
786 while (!Worklist.
empty()) {
788 for (
const auto &SI : SU->
Succs) {
789 SUnit *SuccSU = SI.getSUnit();
791 if (Visited.
count(SuccSU))
806 return MI.isCall() ||
MI.mayRaiseFPException() ||
807 MI.hasUnmodeledSideEffects() ||
808 (
MI.hasOrderedMemoryRef() &&
809 (!
MI.mayLoad() || !
MI.isDereferenceableInvariantLoad()));
817 if (!
MI->hasOneMemOperand())
823 for (
const Value *V : Objs) {
835void SwingSchedulerDAG::addLoopCarriedDependences(
AliasAnalysis *AA) {
842 PendingLoads.
clear();
843 else if (
MI.mayLoad()) {
848 for (
const auto *V : Objs) {
852 }
else if (
MI.mayStore()) {
857 for (
const auto *V : Objs) {
859 PendingLoads.
find(V);
860 if (
I == PendingLoads.
end())
862 for (
auto *Load :
I->second) {
870 int64_t Offset1, Offset2;
871 bool Offset1IsScalable, Offset2IsScalable;
873 Offset1IsScalable,
TRI) &&
875 Offset2IsScalable,
TRI)) {
877 Offset1IsScalable == Offset2IsScalable &&
878 (
int)Offset1 < (
int)Offset2) {
880 "What happened to the chain edge?");
931void SwingSchedulerDAG::updatePhiDependences() {
939 unsigned HasPhiUse = 0;
940 unsigned HasPhiDef = 0;
964 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
969 }
else if (MO.isUse()) {
972 if (
DefMI ==
nullptr)
979 ST.adjustSchedDependency(SU, 0, &
I, MO.getOperandNo(), Dep,
986 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
995 for (
auto &PI :
I.Preds) {
998 if (
I.getInstr()->isPHI()) {
1007 for (
const SDep &
D : RemoveDeps)
1014void SwingSchedulerDAG::changeDependences() {
1019 unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
1020 int64_t NewOffset = 0;
1021 if (!canUseLastOffsetValue(
I.getInstr(), BasePos, OffsetPos, NewBase,
1026 Register OrigBase =
I.getInstr()->getOperand(BasePos).getReg();
1046 for (
const SDep &
P :
I.Preds)
1047 if (
P.getSUnit() == DefSU)
1049 for (
const SDep &
D : Deps) {
1055 for (
auto &
P : LastSU->
Preds)
1058 for (
const SDep &
D : Deps) {
1071 InstrChanges[&
I] = std::make_pair(NewBase, NewOffset);
1082 std::vector<MachineInstr *> &OrderedInsts,
1090 Stage <= LastStage; ++Stage) {
1093 Instrs[
Cycle].push_front(SU);
1100 std::deque<SUnit *> &CycleInstrs = Instrs[
Cycle];
1102 for (
SUnit *SU : CycleInstrs) {
1104 OrderedInsts.push_back(
MI);
1114struct FuncUnitSorter {
1120 : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
1128 unsigned min = UINT_MAX;
1129 if (InstrItins && !InstrItins->
isEmpty()) {
1132 InstrItins->
endStage(SchedClass))) {
1135 if (numAlternatives < min) {
1136 min = numAlternatives;
1153 if (!PRE.ReleaseAtCycle)
1157 unsigned NumUnits = ProcResource->
NumUnits;
1158 if (NumUnits < min) {
1160 F = PRE.ProcResourceIdx;
1165 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1174 unsigned SchedClass =
MI.getDesc().getSchedClass();
1175 if (InstrItins && !InstrItins->
isEmpty()) {
1178 InstrItins->
endStage(SchedClass))) {
1181 Resources[FuncUnits]++;
1196 if (!PRE.ReleaseAtCycle)
1198 Resources[PRE.ProcResourceIdx]++;
1202 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1208 unsigned MFUs1 = minFuncUnits(IS1, F1);
1209 unsigned MFUs2 = minFuncUnits(IS2, F2);
1212 return MFUs1 > MFUs2;
1217class HighRegisterPressureDetector {
1222 const unsigned PSetNum;
1228 std::vector<unsigned> InitSetPressure;
1232 std::vector<unsigned> PressureSetLimit;
1239 using OrderedInstsTy = std::vector<MachineInstr *>;
1243 static void dumpRegisterPressures(
const std::vector<unsigned> &Pressures) {
1244 if (Pressures.size() == 0) {
1248 for (
unsigned P : Pressures) {
1258 for (
auto PSetIter =
MRI.getPressureSets(
Reg); PSetIter.isValid();
1260 dbgs() << *PSetIter <<
' ';
1265 void increaseRegisterPressure(std::vector<unsigned> &Pressure,
1267 auto PSetIter =
MRI.getPressureSets(
Reg);
1268 unsigned Weight = PSetIter.getWeight();
1269 for (; PSetIter.isValid(); ++PSetIter)
1270 Pressure[*PSetIter] += Weight;
1273 void decreaseRegisterPressure(std::vector<unsigned> &Pressure,
1275 auto PSetIter =
MRI.getPressureSets(
Reg);
1276 unsigned Weight = PSetIter.getWeight();
1277 for (; PSetIter.isValid(); ++PSetIter) {
1278 auto &
P = Pressure[*PSetIter];
1280 "register pressure must be greater than or equal weight");
1287 return Reg.isPhysical() &&
MRI.isReserved(
Reg.asMCReg());
1291 return Reg.isVirtual() &&
MRI.getVRegDef(
Reg)->getParent() == OrigMBB;
1302 void computeLiveIn() {
1304 for (
auto &
MI : *OrigMBB) {
1305 if (
MI.isDebugInstr())
1313 if (isReservedRegister(
Reg))
1315 if (isDefinedInThisLoop(
Reg))
1321 for (
auto LiveIn : Used)
1322 increaseRegisterPressure(InitSetPressure, LiveIn);
1327 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1342 Instr2LastUsesTy computeLastUses(
const OrderedInstsTy &OrderedInsts,
1343 Instr2StageTy &Stages)
const {
1349 const auto UpdateTargetRegs = [
this, &TargetRegs](
Register Reg) {
1350 if (isDefinedInThisLoop(
Reg))
1356 UpdateTargetRegs(
Reg);
1358 for (
auto &
Use : ROMap.
find(
MI)->getSecond().Uses)
1359 UpdateTargetRegs(
Use.RegUnit);
1364 return Stages[
MI] +
MI->isPHI();
1369 for (
auto &
Use : ROMap.
find(
MI)->getSecond().Uses) {
1377 if (InstrScore(Orig) < InstrScore(New))
1383 Instr2LastUsesTy LastUses;
1384 for (
auto &Entry : LastUseMI)
1401 std::vector<unsigned>
1402 computeMaxSetPressure(
const OrderedInstsTy &OrderedInsts,
1403 Instr2StageTy &Stages,
1404 const unsigned StageCount)
const {
1411 auto CurSetPressure = InitSetPressure;
1412 auto MaxSetPressure = InitSetPressure;
1413 auto LastUses = computeLastUses(OrderedInsts, Stages);
1416 dbgs() <<
"Ordered instructions:\n";
1418 dbgs() <<
"Stage " << Stages[
MI] <<
": ";
1423 const auto InsertReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1425 if (!
Reg.isValid() || isReservedRegister(
Reg))
1433 increaseRegisterPressure(CurSetPressure,
Reg);
1437 const auto EraseReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1439 if (!
Reg.isValid() || isReservedRegister(
Reg))
1443 if (!RegSet.contains(
Reg))
1448 decreaseRegisterPressure(CurSetPressure,
Reg);
1452 for (
unsigned I = 0;
I < StageCount;
I++) {
1454 const auto Stage = Stages[
MI];
1458 const unsigned Iter =
I - Stage;
1460 for (
auto &Def : ROMap.
find(
MI)->getSecond().Defs)
1461 InsertReg(LiveRegSets[Iter],
Def.RegUnit);
1463 for (
auto LastUse : LastUses[
MI]) {
1466 EraseReg(LiveRegSets[Iter - 1], LastUse);
1468 EraseReg(LiveRegSets[Iter], LastUse);
1472 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1473 MaxSetPressure[PSet] =
1474 std::max(MaxSetPressure[PSet], CurSetPressure[PSet]);
1477 dbgs() <<
"CurSetPressure=";
1478 dumpRegisterPressures(CurSetPressure);
1479 dbgs() <<
" iter=" << Iter <<
" stage=" << Stage <<
":";
1485 return MaxSetPressure;
1491 : OrigMBB(OrigMBB),
MRI(MF.getRegInfo()),
1492 TRI(MF.getSubtarget().getRegisterInfo()),
1493 PSetNum(
TRI->getNumRegPressureSets()), InitSetPressure(PSetNum, 0),
1494 PressureSetLimit(PSetNum, 0) {}
1500 if (
MI.isDebugInstr())
1502 ROMap[&
MI].collect(
MI, *
TRI,
MRI,
false,
true);
1506 computePressureSetLimit(RCI);
1512 const unsigned MaxStage)
const {
1514 "the percentage of the margin must be between 0 to 100");
1516 OrderedInstsTy OrderedInsts;
1517 Instr2StageTy Stages;
1519 const auto MaxSetPressure =
1520 computeMaxSetPressure(OrderedInsts, Stages, MaxStage + 1);
1523 dbgs() <<
"Dump MaxSetPressure:\n";
1524 for (
unsigned I = 0;
I < MaxSetPressure.size();
I++) {
1525 dbgs() <<
format(
"MaxSetPressure[%d]=%d\n",
I, MaxSetPressure[
I]);
1530 for (
unsigned PSet = 0; PSet < PSetNum; PSet++) {
1531 unsigned Limit = PressureSetLimit[PSet];
1534 <<
" Margin=" << Margin <<
"\n");
1535 if (Limit < MaxSetPressure[PSet] + Margin) {
1538 <<
"Rejected the schedule because of too high register pressure\n");
1554unsigned SwingSchedulerDAG::calculateResMII() {
1557 return RM.calculateResMII();
1566unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1567 unsigned RecMII = 0;
1569 for (
NodeSet &Nodes : NodeSets) {
1573 unsigned Delay = Nodes.getLatency();
1574 unsigned Distance = 1;
1577 unsigned CurMII = (Delay + Distance - 1) / Distance;
1578 Nodes.setRecMII(CurMII);
1579 if (CurMII > RecMII)
1590 for (
SUnit &SU : SUnits) {
1593 DepsAdded.
push_back(std::make_pair(&SU, Pred));
1595 for (std::pair<SUnit *, SDep> &
P : DepsAdded) {
1599 SUnit *TargetSU =
D.getSUnit();
1600 unsigned Reg =
D.getReg();
1601 unsigned Lat =
D.getLatency();
1610void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1614 for (
int i = 0, e = SUnits.size(); i != e; ++i) {
1617 for (
auto &SI : SUnits[i].Succs) {
1621 int N =
SI.getSUnit()->NodeNum;
1623 auto Dep = OutputDeps.
find(BackEdge);
1624 if (Dep != OutputDeps.
end()) {
1625 BackEdge = Dep->second;
1626 OutputDeps.
erase(Dep);
1628 OutputDeps[
N] = BackEdge;
1632 if (
SI.getSUnit()->isBoundaryNode() ||
SI.isArtificial() ||
1633 (
SI.getKind() ==
SDep::Anti && !
SI.getSUnit()->getInstr()->isPHI()))
1635 int N =
SI.getSUnit()->NodeNum;
1637 AdjK[i].push_back(
N);
1643 for (
auto &PI : SUnits[i].Preds) {
1644 if (!SUnits[i].getInstr()->mayStore() ||
1647 if (PI.getKind() ==
SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1648 int N = PI.getSUnit()->NodeNum;
1650 AdjK[i].push_back(
N);
1657 for (
auto &OD : OutputDeps)
1658 if (!
Added.test(OD.second)) {
1659 AdjK[OD.first].push_back(OD.second);
1660 Added.set(OD.second);
1666bool SwingSchedulerDAG::Circuits::circuit(
int V,
int S, NodeSetType &NodeSets,
1674 for (
auto W : AdjK[V]) {
1675 if (NumPaths > MaxPaths)
1686 if (!Blocked.test(W)) {
1687 if (circuit(W, S, NodeSets, DAG,
1688 Node2Idx->at(W) < Node2Idx->at(V) ?
true : HasBackedge))
1696 for (
auto W : AdjK[V]) {
1707void SwingSchedulerDAG::Circuits::unblock(
int U) {
1710 while (!BU.
empty()) {
1712 assert(SI != BU.
end() &&
"Invalid B set.");
1715 if (Blocked.test(
W->NodeNum))
1716 unblock(
W->NodeNum);
1722void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1727 Circuits Cir(
SUnits, Topo);
1729 Cir.createAdjacencyStructure(
this);
1730 for (
int I = 0, E =
SUnits.size();
I != E; ++
I) {
1732 Cir.circuit(
I,
I, NodeSets,
this);
1768 for (
auto &Dep : SU.
Preds) {
1769 SUnit *TmpSU = Dep.getSUnit();
1781 if (PHISUs.
size() == 0 || SrcSUs.
size() == 0)
1789 for (
auto &Dep : PHISUs[Index]->Succs) {
1793 SUnit *TmpSU = Dep.getSUnit();
1803 if (UseSUs.
size() == 0)
1808 for (
auto *
I : UseSUs) {
1809 for (
auto *Src : SrcSUs) {
1823 if (
D.isArtificial() ||
D.getSUnit()->isBoundaryNode())
1834void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1835 ScheduleInfo.resize(
SUnits.size());
1838 for (
int I : Topo) {
1846 for (
int I : Topo) {
1848 int zeroLatencyDepth = 0;
1852 if (
P.getLatency() == 0)
1857 asap = std::max(asap, (
int)(
getASAP(
pred) +
P.getLatency() -
1860 maxASAP = std::max(maxASAP, asap);
1861 ScheduleInfo[
I].ASAP = asap;
1862 ScheduleInfo[
I].ZeroLatencyDepth = zeroLatencyDepth;
1868 int zeroLatencyHeight = 0;
1883 ScheduleInfo[
I].ALAP = alap;
1884 ScheduleInfo[
I].ZeroLatencyHeight = zeroLatencyHeight;
1889 I.computeNodeSetInfo(
this);
1892 for (
unsigned i = 0; i <
SUnits.size(); i++) {
1893 dbgs() <<
"\tNode " << i <<
":\n";
1914 if (S && S->count(Pred.
getSUnit()) == 0)
1925 if (S && S->count(Succ.
getSUnit()) == 0)
1931 return !Preds.
empty();
1943 if (S && S->count(Succ.
getSUnit()) == 0)
1953 if (S && S->count(Pred.
getSUnit()) == 0)
1959 return !Succs.
empty();
1974 if (!Visited.
insert(Cur).second)
1975 return Path.contains(Cur);
1976 bool FoundPath =
false;
1977 for (
auto &SI : Cur->
Succs)
1980 computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1981 for (
auto &PI : Cur->
Preds)
1984 computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1999 for (
SUnit *SU : NS) {
2005 if (Reg.isVirtual())
2007 else if (
MRI.isAllocatable(Reg))
2012 for (
SUnit *SU : NS)
2016 if (Reg.isVirtual()) {
2017 if (!
Uses.count(Reg))
2020 }
else if (
MRI.isAllocatable(Reg)) {
2022 if (!
Uses.count(Unit))
2032void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
2033 for (
auto &NS : NodeSets) {
2039 RecRPTracker.init(&
MF, &RegClassInfo, &LIS,
BB,
BB->
end(),
false,
true);
2041 RecRPTracker.closeBottom();
2043 std::vector<SUnit *>
SUnits(NS.begin(), NS.end());
2045 return A->NodeNum >
B->NodeNum;
2048 for (
auto &SU :
SUnits) {
2054 RecRPTracker.setPos(std::next(CurInstI));
2058 RecRPTracker.getMaxUpwardPressureDelta(SU->
getInstr(),
nullptr, RPDelta,
2063 dbgs() <<
"Excess register pressure: SU(" << SU->
NodeNum <<
") "
2066 NS.setExceedPressure(SU);
2069 RecRPTracker.recede();
2076void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
2077 unsigned Colocate = 0;
2078 for (
int i = 0, e = NodeSets.size(); i < e; ++i) {
2083 for (
int j = i + 1;
j <
e; ++
j) {
2104void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
2109 for (
auto &NS : NodeSets) {
2110 if (NS.getRecMII() > 2)
2112 if (NS.getMaxDepth() > MII)
2121void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
2149 NodesAdded.
insert(
I.begin(),
I.end());
2158 addConnectedNodes(
I, NewSet, NodesAdded);
2159 if (!NewSet.
empty())
2160 NodeSets.push_back(NewSet);
2167 addConnectedNodes(
I, NewSet, NodesAdded);
2168 if (!NewSet.
empty())
2169 NodeSets.push_back(NewSet);
2174 if (NodesAdded.
count(&SU) == 0) {
2176 addConnectedNodes(&SU, NewSet, NodesAdded);
2177 if (!NewSet.
empty())
2178 NodeSets.push_back(NewSet);
2184void SwingSchedulerDAG::addConnectedNodes(
SUnit *SU,
NodeSet &NewSet,
2188 for (
auto &SI : SU->
Succs) {
2190 if (!
SI.isArtificial() && !
Successor->isBoundaryNode() &&
2192 addConnectedNodes(
Successor, NewSet, NodesAdded);
2194 for (
auto &PI : SU->
Preds) {
2195 SUnit *Predecessor = PI.getSUnit();
2196 if (!PI.isArtificial() && NodesAdded.
count(Predecessor) == 0)
2197 addConnectedNodes(Predecessor, NewSet, NodesAdded);
2206 for (
SUnit *SU : Set1) {
2207 if (Set2.
count(SU) != 0)
2210 return !Result.empty();
2214void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
2223 for (
SUnit *SU : *J)
2235void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
2239 J->remove_if([&](
SUnit *SUJ) {
return I->count(SUJ); });
2254void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
2258 for (
auto &Nodes : NodeSets) {
2263 R.insert(
N.begin(),
N.end());
2267 R.insert(
N.begin(),
N.end());
2275 }
else if (NodeSets.size() == 1) {
2276 for (
const auto &
N : Nodes)
2277 if (
N->Succs.size() == 0)
2283 SUnit *maxASAP =
nullptr;
2284 for (
SUnit *SU : Nodes) {
2294 while (!
R.empty()) {
2295 if (Order == TopDown) {
2299 while (!
R.empty()) {
2300 SUnit *maxHeight =
nullptr;
2313 NodeOrder.insert(maxHeight);
2315 R.remove(maxHeight);
2316 for (
const auto &
I : maxHeight->
Succs) {
2317 if (Nodes.count(
I.getSUnit()) == 0)
2319 if (NodeOrder.contains(
I.getSUnit()))
2323 R.insert(
I.getSUnit());
2326 for (
const auto &
I : maxHeight->
Preds) {
2329 if (Nodes.count(
I.getSUnit()) == 0)
2331 if (NodeOrder.contains(
I.getSUnit()))
2333 R.insert(
I.getSUnit());
2339 if (
pred_L(NodeOrder,
N, &Nodes))
2340 R.insert(
N.begin(),
N.end());
2345 while (!
R.empty()) {
2346 SUnit *maxDepth =
nullptr;
2358 NodeOrder.insert(maxDepth);
2361 if (Nodes.isExceedSU(maxDepth)) {
2364 R.insert(Nodes.getNode(0));
2367 for (
const auto &
I : maxDepth->
Preds) {
2368 if (Nodes.count(
I.getSUnit()) == 0)
2370 if (NodeOrder.contains(
I.getSUnit()))
2372 R.insert(
I.getSUnit());
2375 for (
const auto &
I : maxDepth->
Succs) {
2378 if (Nodes.count(
I.getSUnit()) == 0)
2380 if (NodeOrder.contains(
I.getSUnit()))
2382 R.insert(
I.getSUnit());
2388 if (
succ_L(NodeOrder,
N, &Nodes))
2389 R.insert(
N.begin(),
N.end());
2396 dbgs() <<
"Node order: ";
2397 for (
SUnit *
I : NodeOrder)
2398 dbgs() <<
" " <<
I->NodeNum <<
" ";
2405bool SwingSchedulerDAG::schedulePipeline(
SMSchedule &Schedule) {
2407 if (NodeOrder.empty()){
2412 bool scheduleFound =
false;
2413 std::unique_ptr<HighRegisterPressureDetector> HRPDetector;
2417 HRPDetector->init(RegClassInfo);
2420 for (
unsigned II = MII;
II <= MAX_II && !scheduleFound; ++
II) {
2432 int EarlyStart = INT_MIN;
2433 int LateStart = INT_MAX;
2442 dbgs() <<
format(
"\tes: %8x ls: %8x\n", EarlyStart, LateStart));
2444 if (EarlyStart > LateStart)
2445 scheduleFound =
false;
2446 else if (EarlyStart != INT_MIN && LateStart == INT_MAX)
2448 Schedule.
insert(SU, EarlyStart, EarlyStart + (
int)
II - 1,
II);
2449 else if (EarlyStart == INT_MIN && LateStart != INT_MAX)
2451 Schedule.
insert(SU, LateStart, LateStart - (
int)
II + 1,
II);
2452 else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2453 LateStart = std::min(LateStart, EarlyStart + (
int)
II - 1);
2462 scheduleFound = Schedule.
insert(SU, LateStart, EarlyStart,
II);
2464 scheduleFound = Schedule.
insert(SU, EarlyStart, LateStart,
II);
2467 scheduleFound = Schedule.
insert(SU, FirstCycle +
getASAP(SU),
2476 scheduleFound =
false;
2480 dbgs() <<
"\tCan't schedule\n";
2482 }
while (++NI != NE && scheduleFound);
2504 if (scheduleFound) {
2510 if (scheduleFound) {
2512 Pass.ORE->emit([&]() {
2515 <<
"Schedule found with Initiation Interval: "
2517 <<
", MaxStageCount: "
2528bool SwingSchedulerDAG::computeDelta(
MachineInstr &
MI,
unsigned &Delta)
const {
2532 bool OffsetIsScalable;
2537 if (OffsetIsScalable)
2540 if (!BaseOp->
isReg())
2548 if (BaseDef && BaseDef->
isPHI()) {
2572 unsigned &OffsetPos,
2578 unsigned BasePosLd, OffsetPosLd;
2581 Register BaseReg =
MI->getOperand(BasePosLd).getReg();
2586 if (!Phi || !
Phi->isPHI())
2595 if (!PrevDef || PrevDef ==
MI)
2601 unsigned BasePos1 = 0, OffsetPos1 = 0;
2607 int64_t LoadOffset =
MI->getOperand(OffsetPosLd).getImm();
2617 BasePos = BasePosLd;
2618 OffsetPos = OffsetPosLd;
2630 InstrChanges.find(SU);
2631 if (It != InstrChanges.end()) {
2632 std::pair<unsigned, int64_t> RegAndOffset = It->second;
2633 unsigned BasePos, OffsetPos;
2636 Register BaseReg =
MI->getOperand(BasePos).getReg();
2642 if (BaseStageNum < DefStageNum) {
2644 int OffsetDiff = DefStageNum - BaseStageNum;
2645 if (DefCycleNum < BaseCycleNum) {
2651 MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
2666 while (Def->isPHI()) {
2667 if (!Visited.
insert(Def).second)
2669 for (
unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
2670 if (Def->getOperand(i + 1).getMBB() ==
BB) {
2682 bool isSucc)
const {
2697 assert(SI !=
nullptr && DI !=
nullptr &&
"Expecting SUnit with an MI.");
2711 unsigned DeltaS, DeltaD;
2712 if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
2716 int64_t OffsetS, OffsetD;
2717 bool OffsetSIsScalable, OffsetDIsScalable;
2725 assert(!OffsetSIsScalable && !OffsetDIsScalable &&
2726 "Expected offsets to be byte offsets");
2730 if (!DefS || !DefD || !DefS->
isPHI() || !DefD->
isPHI())
2733 unsigned InitValS = 0;
2734 unsigned LoopValS = 0;
2735 unsigned InitValD = 0;
2736 unsigned LoopValD = 0;
2760 if (DeltaS != DeltaD || DeltaS < AccessSizeS.
getValue() ||
2764 return (OffsetS + (int64_t)AccessSizeS.
getValue() <
2765 OffsetD + (int64_t)AccessSizeD.
getValue());
2768void SwingSchedulerDAG::postProcessDAG() {
2769 for (
auto &M : Mutations)
2779 bool forward =
true;
2781 dbgs() <<
"Trying to insert node between " << StartCycle <<
" and "
2782 << EndCycle <<
" II: " <<
II <<
"\n";
2784 if (StartCycle > EndCycle)
2788 int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
2789 for (
int curCycle = StartCycle; curCycle != termCycle;
2790 forward ? ++curCycle : --curCycle) {
2795 dbgs() <<
"\tinsert at cycle " << curCycle <<
" ";
2800 ProcItinResources.reserveResources(*SU, curCycle);
2801 ScheduledInstrs[curCycle].push_back(SU);
2802 InstrToCycle.insert(std::make_pair(SU, curCycle));
2803 if (curCycle > LastCycle)
2804 LastCycle = curCycle;
2805 if (curCycle < FirstCycle)
2806 FirstCycle = curCycle;
2810 dbgs() <<
"\tfailed to insert at cycle " << curCycle <<
" ";
2822 int EarlyCycle = INT_MAX;
2823 while (!Worklist.
empty()) {
2826 if (Visited.
count(PrevSU))
2828 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
2829 if (it == InstrToCycle.end())
2831 EarlyCycle = std::min(EarlyCycle, it->second);
2832 for (
const auto &PI : PrevSU->
Preds)
2845 int LateCycle = INT_MIN;
2846 while (!Worklist.
empty()) {
2851 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
2852 if (it == InstrToCycle.end())
2854 LateCycle = std::max(LateCycle, it->second);
2855 for (
const auto &SI : SuccSU->
Succs)
2867 for (
auto &
P : SU->
Preds)
2868 if (DAG->
isBackedge(SU,
P) &&
P.getSUnit()->getInstr()->isPHI())
2869 for (
auto &S :
P.getSUnit()->Succs)
2871 return P.getSUnit();
2882 for (
int cycle =
getFirstCycle(); cycle <= LastCycle; ++cycle) {
2888 for (
unsigned i = 0, e = (
unsigned)SU->
Preds.size(); i != e; ++i) {
2894 *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2897 *MinLateStart = std::min(*MinLateStart,
End);
2902 *MinLateStart = std::min(*MinLateStart, LateStart);
2910 *MinLateStart = std::min(*MinLateStart, cycle);
2912 for (
unsigned i = 0, e = (
unsigned)SU->
Succs.size(); i != e; ++i) {
2913 if (SU->
Succs[i].getSUnit() ==
I) {
2918 *MinLateStart = std::min(*MinLateStart, LateStart);
2921 *MaxEarlyStart = std::max(*MaxEarlyStart, Start);
2926 *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2938 std::deque<SUnit *> &Insts)
const {
2940 bool OrderBeforeUse =
false;
2941 bool OrderAfterDef =
false;
2942 bool OrderBeforeDef =
false;
2943 unsigned MoveDef = 0;
2944 unsigned MoveUse = 0;
2948 for (std::deque<SUnit *>::iterator
I = Insts.begin(), E = Insts.end();
I != E;
2951 if (!MO.isReg() || !MO.getReg().isVirtual())
2955 unsigned BasePos, OffsetPos;
2956 if (ST.getInstrInfo()->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos))
2957 if (
MI->getOperand(BasePos).getReg() == Reg)
2961 std::tie(Reads,
Writes) =
2962 (*I)->getInstr()->readsWritesVirtualRegister(Reg);
2964 OrderBeforeUse =
true;
2969 OrderAfterDef =
true;
2973 OrderBeforeUse =
true;
2977 OrderAfterDef =
true;
2981 OrderBeforeUse =
true;
2985 OrderAfterDef =
true;
2990 OrderBeforeUse =
true;
2996 OrderBeforeDef =
true;
3003 for (
auto &S : SU->
Succs) {
3007 OrderBeforeUse =
true;
3016 OrderBeforeUse =
true;
3017 if ((MoveUse == 0) || (Pos < MoveUse))
3021 for (
auto &
P : SU->
Preds) {
3022 if (
P.getSUnit() != *
I)
3027 OrderAfterDef =
true;
3034 if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3035 OrderBeforeUse =
false;
3040 OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3044 if (OrderBeforeUse && OrderAfterDef) {
3045 SUnit *UseSU = Insts.at(MoveUse);
3046 SUnit *DefSU = Insts.at(MoveDef);
3047 if (MoveUse > MoveDef) {
3048 Insts.erase(Insts.begin() + MoveUse);
3049 Insts.erase(Insts.begin() + MoveDef);
3051 Insts.erase(Insts.begin() + MoveDef);
3052 Insts.erase(Insts.begin() + MoveUse);
3062 Insts.push_front(SU);
3064 Insts.push_back(SU);
3072 assert(Phi.isPHI() &&
"Expecting a Phi.");
3077 unsigned InitVal = 0;
3078 unsigned LoopVal = 0;
3079 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3087 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3105 if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3111 if (DMO.getReg() == LoopReg)
3136 for (
auto &SU : SSD->
SUnits)
3140 while (!Worklist.
empty()) {
3142 if (DoNotPipeline.
count(SU))
3145 DoNotPipeline.
insert(SU);
3146 for (
auto &Dep : SU->
Preds)
3149 for (
auto &Dep : SU->
Succs)
3153 return DoNotPipeline;
3162 int NewLastCycle = INT_MIN;
3167 NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);
3173 for (
auto &Dep : SU.
Preds)
3174 NewCycle = std::max(InstrToCycle[Dep.getSUnit()], NewCycle);
3176 int OldCycle = InstrToCycle[&SU];
3177 if (OldCycle != NewCycle) {
3178 InstrToCycle[&SU] = NewCycle;
3183 <<
") is not pipelined; moving from cycle " << OldCycle
3184 <<
" to " << NewCycle <<
" Instr:" << *SU.
getInstr());
3186 NewLastCycle = std::max(NewLastCycle, NewCycle);
3188 LastCycle = NewLastCycle;
3205 int CycleDef = InstrToCycle[&SU];
3206 assert(StageDef != -1 &&
"Instruction should have been scheduled.");
3207 for (
auto &SI : SU.
Succs)
3208 if (SI.isAssignedRegDep() && !SI.getSUnit()->isBoundaryNode())
3212 if (InstrToCycle[SI.getSUnit()] <= CycleDef)
3229void SwingSchedulerDAG::checkValidNodeOrder(
const NodeSetType &Circuits)
const {
3232 typedef std::pair<SUnit *, unsigned> UnitIndex;
3233 std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(
nullptr, 0));
3235 for (
unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
3236 Indices.push_back(std::make_pair(NodeOrder[i], i));
3238 auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3239 return std::get<0>(i1) < std::get<0>(i2);
3252 for (
unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
3253 SUnit *SU = NodeOrder[i];
3256 bool PredBefore =
false;
3257 bool SuccBefore =
false;
3266 unsigned PredIndex = std::get<1>(
3282 unsigned SuccIndex = std::get<1>(
3295 Circuits, [SU](
const NodeSet &Circuit) {
return Circuit.
count(SU); });
3300 NumNodeOrderIssues++;
3304 <<
" are scheduled before node " << SU->
NodeNum
3311 dbgs() <<
"Invalid node order found!\n";
3322 unsigned OverlapReg = 0;
3323 unsigned NewBaseReg = 0;
3324 for (
SUnit *SU : Instrs) {
3326 for (
unsigned i = 0, e =
MI->getNumOperands(); i < e; ++i) {
3334 InstrChanges.find(SU);
3335 if (It != InstrChanges.end()) {
3336 unsigned BasePos, OffsetPos;
3342 MI->getOperand(OffsetPos).getImm() - It->second.second;
3355 unsigned TiedUseIdx = 0;
3356 if (
MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3358 OverlapReg =
MI->getOperand(TiedUseIdx).getReg();
3360 NewBaseReg =
MI->getOperand(i).getReg();
3369 const std::deque<SUnit *> &Instrs)
const {
3370 std::deque<SUnit *> NewOrderPhi;
3371 for (
SUnit *SU : Instrs) {
3373 NewOrderPhi.push_back(SU);
3375 std::deque<SUnit *> NewOrderI;
3376 for (
SUnit *SU : Instrs) {
3392 std::deque<SUnit *> &cycleInstrs =
3393 ScheduledInstrs[cycle + (stage * InitiationInterval)];
3395 ScheduledInstrs[cycle].push_front(SU);
3401 for (
int cycle =
getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3402 ScheduledInstrs.erase(cycle);
3412 std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[
Cycle];
3421 os <<
"Num nodes " <<
size() <<
" rec " << RecMII <<
" mov " << MaxMOV
3422 <<
" depth " << MaxDepth <<
" col " << Colocate <<
"\n";
3423 for (
const auto &
I : Nodes)
3424 os <<
" SU(" <<
I->NodeNum <<
") " << *(
I->getInstr());
3428#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3435 for (
SUnit *CI : cycleInstrs->second) {
3437 os <<
"(" << CI->
NodeNum <<
") ";
3448void ResourceManager::dumpMRT()
const {
3452 std::stringstream SS;
3454 SS << std::setw(4) <<
"Slot";
3456 SS << std::setw(3) <<
I;
3457 SS << std::setw(7) <<
"#Mops"
3459 for (
int Slot = 0; Slot < InitiationInterval; ++Slot) {
3460 SS << std::setw(4) << Slot;
3462 SS << std::setw(3) << MRT[Slot][
I];
3463 SS << std::setw(7) << NumScheduledMops[Slot] <<
"\n";
3472 unsigned ProcResourceID = 0;
3477 "Too many kinds of resources, unsupported");
3483 if (
Desc.SubUnitsIdxBegin)
3485 Masks[
I] = 1ULL << ProcResourceID;
3491 if (!
Desc.SubUnitsIdxBegin)
3493 Masks[
I] = 1ULL << ProcResourceID;
3494 for (
unsigned U = 0; U <
Desc.NumUnits; ++U)
3495 Masks[
I] |= Masks[
Desc.SubUnitsIdxBegin[U]];
3500 dbgs() <<
"ProcResourceDesc:\n";
3503 dbgs() <<
format(
" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
3504 ProcResource->
Name,
I, Masks[
I],
3507 dbgs() <<
" -----------------\n";
3515 dbgs() <<
"canReserveResources:\n";
3518 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
3524 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
3530 reserveResources(SCDesc,
Cycle);
3531 bool Result = !isOverbooked();
3532 unreserveResources(SCDesc,
Cycle);
3541 dbgs() <<
"reserveResources:\n";
3544 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
3550 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
3556 reserveResources(SCDesc,
Cycle);
3561 dbgs() <<
"reserveResources: done!\n\n";
3572 ++MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
3575 ++NumScheduledMops[positiveModulo(
C, InitiationInterval)];
3584 --MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
3587 --NumScheduledMops[positiveModulo(
C, InitiationInterval)];
3590bool ResourceManager::isOverbooked()
const {
3592 for (
int Slot = 0;
Slot < InitiationInterval; ++
Slot) {
3595 if (MRT[Slot][
I] >
Desc->NumUnits)
3598 if (NumScheduledMops[Slot] > IssueWidth)
3604int ResourceManager::calculateResMIIDFA()
const {
3609 FuncUnitSorter FUS = FuncUnitSorter(*ST);
3611 FUS.calcCriticalResources(*SU.
getInstr());
3622 while (!FuncUnitOrder.empty()) {
3624 FuncUnitOrder.pop();
3631 unsigned ReservedCycles = 0;
3632 auto *RI = Resources.
begin();
3633 auto *RE = Resources.
end();
3635 dbgs() <<
"Trying to reserve resource for " << NumCycles
3636 <<
" cycles for \n";
3639 for (
unsigned C = 0;
C < NumCycles; ++
C)
3641 if ((*RI)->canReserveResources(*
MI)) {
3642 (*RI)->reserveResources(*
MI);
3649 <<
", NumCycles:" << NumCycles <<
"\n");
3651 for (
unsigned C = ReservedCycles;
C < NumCycles; ++
C) {
3653 <<
"NewResource created to reserve resources"
3656 assert(NewResource->canReserveResources(*
MI) &&
"Reserve error.");
3657 NewResource->reserveResources(*
MI);
3658 Resources.
push_back(std::unique_ptr<DFAPacketizer>(NewResource));
3662 int Resmii = Resources.
size();
3669 return calculateResMIIDFA();
3688 <<
" WriteProcRes: ";
3699 dbgs() <<
Desc->Name <<
": " << PRE.ReleaseAtCycle <<
", ";
3702 ResourceCount[PRE.ProcResourceIdx] += PRE.ReleaseAtCycle;
3707 int Result = (NumMops + IssueWidth - 1) / IssueWidth;
3710 dbgs() <<
"#Mops: " << NumMops <<
", "
3711 <<
"IssueWidth: " << IssueWidth <<
", "
3712 <<
"Cycles: " << Result <<
"\n";
3717 std::stringstream SS;
3718 SS << std::setw(2) <<
"ID" << std::setw(16) <<
"Name" << std::setw(10)
3719 <<
"Units" << std::setw(10) <<
"Consumed" << std::setw(10) <<
"Cycles"
3726 int Cycles = (ResourceCount[
I] +
Desc->NumUnits - 1) /
Desc->NumUnits;
3729 std::stringstream SS;
3730 SS << std::setw(2) <<
I << std::setw(16) <<
Desc->Name << std::setw(10)
3731 <<
Desc->NumUnits << std::setw(10) << ResourceCount[
I]
3732 << std::setw(10) << Cycles <<
"\n";
3736 if (Cycles > Result)
3743 InitiationInterval =
II;
3744 DFAResources.clear();
3745 DFAResources.resize(
II);
3746 for (
auto &
I : DFAResources)
3747 I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
3750 NumScheduledMops.
clear();
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file defines the DenseMap class.
SmallVector< uint32_t, 0 > Writes
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
static cl::opt< int > SwpForceII("pipeliner-force-ii", cl::desc("Force pipeliner to use specified II."), cl::Hidden, cl::init(-1))
A command line argument to force pipeliner to use specified initial interval.
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
static cl::opt< bool > MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false), cl::desc("Use the MVE code generator for software pipelining"))
static cl::opt< int > RegPressureMargin("pipeliner-register-pressure-margin", cl::Hidden, cl::init(5), cl::desc("Margin representing the unused percentage of " "the register pressure limit"))
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
static void computeScheduledInsts(const SwingSchedulerDAG *SSD, SMSchedule &Schedule, std::vector< MachineInstr * > &OrderedInsts, DenseMap< MachineInstr *, unsigned > &Stages)
Create an instruction stream that represents a single iteration and stage of each instruction.
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::desc("Ignore RecMII"))
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
Modulo Software Pipelining
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
static bool isDependenceBarrier(MachineInstr &MI)
Return true if the instruction causes a chain between memory references before and after it.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
static unsigned getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
static cl::opt< int > SwpIISearchRange("pipeliner-ii-search-range", cl::desc("Range to search for II"), cl::Hidden, cl::init(10))
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
static cl::opt< bool > LimitRegPressure("pipeliner-register-pressure", cl::Hidden, cl::init(false), cl::desc("Limit register pressure of scheduled loop"))
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
unsigned const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PriorityQueue class.
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Target-Independent Code Generator Pass Configuration Options pass.
static unsigned getSize(unsigned Kind)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
Implements a dense probed hash-table based set.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
A possibly irreducible generalization of a Loop.
Itinerary data supplied by a subtarget to be used by a target.
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
bool isEmpty() const
Returns true if there are no itineraries.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
TypeSize getValue() const
BlockT * getHeader() const
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Generic base class for all target subtargets.
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
Tracking metadata reference owned by Metadata.
StringRef getString() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
instr_iterator instr_end()
Analysis pass which computes a MachineDominatorTree.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool isRegSequence() const
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
const MachineOperand & getOperand(unsigned i) const
iterator_range< filtered_mop_iterator > all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
A description of a memory reference used in the backend.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
const Value * getValue() const
Return the base address of the memory access.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
The main class in the implementation of the target independent software pipeliner pass.
const TargetInstrInfo * TII
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineDominatorTree * MDT
const MachineLoopInfo * MLI
MachineOptimizationRemarkEmitter * ORE
RegisterClassInfo RegClassInfo
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
static use_instr_iterator use_instr_end()
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
This class implements a map that also provides access to all stored values in a deterministic order.
iterator find(const KeyT &Key)
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
Expand the kernel using modulo variable expansion algorithm (MVE).
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
SUnit * getNode(unsigned i) const
void print(raw_ostream &os) const
void setRecMII(unsigned mii)
unsigned count(SUnit *SU) const
void setColocate(unsigned c)
int compareRecMII(NodeSet &RHS)
LLVM_DUMP_METHOD void dump() const
Pass interface - Implemented by all 'passes'.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A reimplementation of ModuloScheduleExpander.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Track the current register pressure at some position in the instruction stream, and remember the high...
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
Wrapper class representing virtual and physical registers.
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
int calculateResMII() const
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
void init(int II)
Initialize resources with the initiation interval II.
bool canReserveResources(SUnit &SU, int Cycle)
Check if the resources occupied by a machine instruction are available in the current state.
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Kind
These are the different kinds of scheduling dependencies.
@ Output
A register output-dependence (aka WAW).
@ Order
Any other ordering dependency.
@ Anti
A register anti-dependence (aka WAR).
@ Data
Regular data dependence (aka true-dependence).
void setLatency(unsigned Lat)
Sets the latency for this edge.
@ Barrier
An unknown scheduling barrier.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
This class represents the scheduled code.
std::deque< SUnit * > reorderInstructions(const SwingSchedulerDAG *SSD, const std::deque< SUnit * > &Instrs) const
int earliestCycleInChain(const SDep &Dep)
Return the cycle of the earliest scheduled instruction in the dependence chain.
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
SmallSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Determine transitive dependences of unpipelineable instructions.
void dump() const
Utility function used for debugging to print the schedule.
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
int latestCycleInChain(const SDep &Dep)
Return the cycle of the latest scheduled instruction in the dependence chain.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts) const
Order the instructions within a cycle so that the definitions occur before the uses.
bool isValidSchedule(SwingSchedulerDAG *SSD)
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO) const
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU, SwingSchedulerDAG *DAG) const
Return true if all scheduled predecessors are loop-carried output/order dependencies.
bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const
Return true if the scheduled Phi has a loop carried operand.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
unsigned short Latency
Node latency.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
bool hasPhysRegDefs
Has physreg defs that are being used.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
A ScheduleDAG for scheduling lists of MachineInstr.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock * BB
The block in which to insert instructions.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void dumpNode(const SUnit &SU) const override
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
void dump() const override
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an edge to be removed from the specified node N from ...
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
MachineFunction & MF
Machine function.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
typename vector_type::const_iterator iterator
void clear()
Completely clear the SetVector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc=true) const
Return true for an order or output dependence that is loop carried potentially.
unsigned getInstrBaseReg(SUnit *SU) const
Return the new base register that was stored away for the changed instruction.
unsigned getDepth(SUnit *Node)
The depth, in the dependence graph, for a node.
int getASAP(SUnit *Node)
Return the earliest time an instruction may be scheduled.
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
void finishBlock() override
Clean up after the software pipeliner runs.
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
int getZeroLatencyDepth(SUnit *Node)
The maximum unweighted length of a path from an arbitrary node to the given node in which each edge h...
unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep)
The distance function, which indicates that operation V of iteration I depends on operations U of ite...
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
int getMOV(SUnit *Node)
The mobility function, which the number of slots in which an instruction may be scheduled.
int getZeroLatencyHeight(SUnit *Node)
The maximum unweighted length of a path from the given node to an arbitrary node in which each edge h...
bool isBackedge(SUnit *Source, const SDep &Dep)
Return true if the dependence is a back-edge in the data dependence graph.
unsigned getHeight(SUnit *Node)
The height, in the dependence graph, for a node.
int getALAP(SUnit *Node)
Return the latest time an instruction my be scheduled.
Object returned by analyzeLoopForPipelining.
virtual bool isMVEExpanderSupported()
Return true if the target can expand pipelined schedule with modulo variable expansion.
virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const =0
Return true if the given instruction should not be pipelined and should be ignored.
virtual bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS)
Return true if the proposed schedule should used.
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don't consume any machine resources in their current form.
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
virtual DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const
Create machine specific model for scheduling.
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const
Sometimes, it is possible for the target to tell, even without aliasing information,...
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static Type * getVoidTy(LLVMContext &C)
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
The main class in the implementation of the target independent window scheduler.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
int popcount(T Value) noexcept
Count the number of set bits in a value.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
@ WS_Force
Use window algorithm after SMS algorithm fails.
@ WS_On
Turn off window algorithm.
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
cl::opt< bool > SwpEnableCopyToPhi
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
cl::opt< WindowSchedulingFlag > WindowSchedulingOption("window-sched", cl::Hidden, cl::init(WindowSchedulingFlag::WS_On), cl::desc("Set how to use window scheduling algorithm."), cl::values(clEnumValN(WindowSchedulingFlag::WS_Off, "off", "Turn off window algorithm."), clEnumValN(WindowSchedulingFlag::WS_On, "on", "Use window algorithm after SMS algorithm fails."), clEnumValN(WindowSchedulingFlag::WS_Force, "force", "Use window algorithm instead of SMS algorithm.")))
A command line argument to set the window scheduling option.
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Description of the encoding of one expression Op.
These values represent a non-pipelined step in the execution of an instruction.
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.
static constexpr LaneBitmask getNone()
Define a kind of processor resource that will be modeled by the scheduler.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
unsigned getNumProcResourceKinds() const
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
MachineInstr * LoopInductionVar
SmallVector< MachineOperand, 4 > BrCond
MachineInstr * LoopCompare
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > LoopPipelinerInfo
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
const TargetPassConfig * PassConfig
const MachineDominatorTree * MDT
RegisterClassInfo * RegClassInfo
const MachineLoopInfo * MLI
Store the effects of a change in pressure on things that MI scheduler cares about.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.