72#include "llvm/Config/llvm-config.h"
101#define DEBUG_TYPE "pipeliner"
103STATISTIC(NumTrytoPipeline,
"Number of loops that we attempt to pipeline");
104STATISTIC(NumPipelined,
"Number of loops software pipelined");
105STATISTIC(NumNodeOrderIssues,
"Number of node order issues found");
106STATISTIC(NumFailBranch,
"Pipeliner abort due to unknown branch");
107STATISTIC(NumFailLoop,
"Pipeliner abort due to unsupported loop");
108STATISTIC(NumFailPreheader,
"Pipeliner abort due to missing preheader");
109STATISTIC(NumFailLargeMaxMII,
"Pipeliner abort due to MaxMII too large");
110STATISTIC(NumFailZeroMII,
"Pipeliner abort due to zero MII");
111STATISTIC(NumFailNoSchedule,
"Pipeliner abort due to no schedule found");
112STATISTIC(NumFailZeroStage,
"Pipeliner abort due to zero stage");
113STATISTIC(NumFailLargeMaxStage,
"Pipeliner abort due to too many stages");
114STATISTIC(NumFailTooManyStores,
"Pipeliner abort due to too many stores");
118 cl::desc(
"Enable Software Pipelining"));
127 cl::desc(
"Size limit for the MII."),
133 cl::desc(
"Force pipeliner to use specified II."),
139 cl::desc(
"Maximum stages allowed in the generated scheduled."),
146 cl::desc(
"Prune dependences between unrelated Phi nodes."),
153 cl::desc(
"Prune loop carried order dependences."),
171 cl::desc(
"Instead of emitting the pipelined code, annotate instructions "
172 "with the generated schedule for feeding into the "
173 "-modulo-schedule-test pass"));
178 "Use the experimental peeling code generator for software pipelining"));
186 cl::desc(
"Limit register pressure of scheduled loop"));
191 cl::desc(
"Margin representing the unused percentage of "
192 "the register pressure limit"));
196 cl::desc(
"Use the MVE code generator for software pipelining"));
201 "pipeliner-max-num-stores",
209 cl::desc(
"Enable CopyToPhi DAG Mutation"));
214 "pipeliner-force-issue-width",
221 cl::desc(
"Set how to use window scheduling algorithm."),
223 "Turn off window algorithm."),
225 "Use window algorithm after SMS algorithm fails."),
227 "Use window algorithm instead of SMS algorithm.")));
229unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
237 "Modulo Software Pipelining",
false,
false)
279 enum class InstrTag {
288 TaggedSUnit(
SUnit *SU, InstrTag Tag)
291 InstrTag
getTag()
const {
return InstrTag(getInt()); }
296 struct NoBarrierInstsChunk {
301 void append(
SUnit *SU);
306 std::vector<SUnit> &SUnits;
312 std::vector<BitVector> LoopCarried;
325 std::vector<TaggedSUnit> TaggedSUnits;
339 return LoopCarried[Idx];
344 std::optional<InstrTag> getInstrTag(
SUnit *SU)
const;
346 void addLoopCarriedDepenenciesForChunks(
const NoBarrierInstsChunk &From,
347 const NoBarrierInstsChunk &To);
354 void computeDependenciesAux();
356 void setLoopCarriedDep(
const SUnit *Src,
const SUnit *Dst) {
357 LoopCarried[Src->NodeNum].set(Dst->NodeNum);
389 TII =
MF->getSubtarget().getInstrInfo();
392 for (
const auto &L : *
MLI)
402bool MachinePipeliner::scheduleLoop(
MachineLoop &L) {
404 for (
const auto &InnerLoop : L)
405 Changed |= scheduleLoop(*InnerLoop);
417 setPragmaPipelineOptions(L);
418 if (!canPipelineLoop(L)) {
422 L.getStartLoc(), L.getHeader())
423 <<
"Failed to pipeline loop";
426 LI.LoopPipelinerInfo.reset();
431 if (useSwingModuloScheduler())
432 Changed = swingModuloScheduler(L);
434 if (useWindowScheduler(
Changed))
435 Changed = runWindowScheduler(L);
437 LI.LoopPipelinerInfo.reset();
441void MachinePipeliner::setPragmaPipelineOptions(
MachineLoop &L) {
446 MachineBasicBlock *LBLK =
L.getTopBlock();
459 MDNode *LoopID = TI->
getMetadata(LLVMContext::MD_loop);
460 if (LoopID ==
nullptr)
477 if (S->
getString() ==
"llvm.loop.pipeline.initiationinterval") {
479 "Pipeline initiation interval hint metadata should have two operands.");
483 }
else if (S->
getString() ==
"llvm.loop.pipeline.disable") {
496 auto It = PhiDeps.find(
Reg);
497 if (It == PhiDeps.end())
508 for (
unsigned Dep : It->second) {
523 unsigned DefReg =
MI.getOperand(0).getReg();
527 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2)
528 Ins->second.push_back(
MI.getOperand(
I).getReg());
535 for (
const auto &KV : PhiDeps) {
536 unsigned Reg = KV.first;
547bool MachinePipeliner::canPipelineLoop(
MachineLoop &L) {
548 if (
L.getNumBlocks() != 1) {
550 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
551 L.getStartLoc(),
L.getHeader())
552 <<
"Not a single basic block: "
553 <<
ore::NV(
"NumBlocks",
L.getNumBlocks());
565 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
566 L.getStartLoc(),
L.getHeader())
567 <<
"Disabled by Pragma.";
577 if (
TII->analyzeBranch(*
L.getHeader(),
LI.TBB,
LI.FBB,
LI.BrCond)) {
578 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeBranch, can NOT pipeline Loop\n");
581 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
582 L.getStartLoc(),
L.getHeader())
583 <<
"The branch can't be understood";
588 LI.LoopInductionVar =
nullptr;
589 LI.LoopCompare =
nullptr;
590 LI.LoopPipelinerInfo =
TII->analyzeLoopForPipelining(
L.getTopBlock());
591 if (!
LI.LoopPipelinerInfo) {
592 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeLoop, can NOT pipeline Loop\n");
595 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
596 L.getStartLoc(),
L.getHeader())
597 <<
"The loop structure is not supported";
602 if (!
L.getLoopPreheader()) {
603 LLVM_DEBUG(
dbgs() <<
"Preheader not found, can NOT pipeline Loop\n");
606 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
607 L.getStartLoc(),
L.getHeader())
608 <<
"No loop preheader found";
613 unsigned NumStores = 0;
614 for (MachineInstr &
MI : *
L.getHeader())
619 NumFailTooManyStores++;
621 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
622 L.getStartLoc(),
L.getHeader())
623 <<
"Too many store instructions in the loop: "
624 <<
ore::NV(
"NumStores", NumStores) <<
" > "
631 preprocessPhiNodes(*
L.getHeader());
636 MachineRegisterInfo &
MRI =
MF->getRegInfo();
640 for (MachineInstr &PI :
B.phis()) {
641 MachineOperand &DefOp = PI.getOperand(0);
643 auto *RC =
MRI.getRegClass(DefOp.
getReg());
645 for (
unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
646 MachineOperand &RegOp = PI.getOperand(i);
653 MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
670bool MachinePipeliner::swingModuloScheduler(
MachineLoop &L) {
671 assert(
L.getBlocks().size() == 1 &&
"SMS works on single blocks only.");
674 SwingSchedulerDAG SMS(
678 MachineBasicBlock *
MBB =
L.getHeader();
696 return SMS.hasNewSchedule();
710bool MachinePipeliner::runWindowScheduler(
MachineLoop &L) {
718 Context.RegClassInfo->runOnMachineFunction(*
MF);
723bool MachinePipeliner::useSwingModuloScheduler() {
728bool MachinePipeliner::useWindowScheduler(
bool Changed) {
735 "llvm.loop.pipeline.initiationinterval is set.\n");
743void SwingSchedulerDAG::setMII(
unsigned ResMII,
unsigned RecMII) {
746 else if (II_setByPragma > 0)
747 MII = II_setByPragma;
749 MII = std::max(ResMII, RecMII);
752void SwingSchedulerDAG::setMAX_II() {
755 else if (II_setByPragma > 0)
756 MAX_II = II_setByPragma;
766 updatePhiDependences();
767 Topo.InitDAGTopologicalSorting();
773 dbgs() <<
"===== Loop Carried Edges Begin =====\n";
776 dbgs() <<
"===== Loop Carried Edges End =====\n";
779 NodeSetType NodeSets;
780 findCircuits(NodeSets);
781 NodeSetType Circuits = NodeSets;
784 unsigned ResMII = calculateResMII();
785 unsigned RecMII = calculateRecMII(NodeSets);
793 setMII(ResMII, RecMII);
797 <<
" (rec=" << RecMII <<
", res=" << ResMII <<
")\n");
803 Pass.ORE->emit([&]() {
805 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
806 <<
"Invalid Minimal Initiation Interval: 0";
814 <<
", we don't pipeline large loops\n");
815 NumFailLargeMaxMII++;
816 Pass.ORE->emit([&]() {
818 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
819 <<
"Minimal Initiation Interval too large: "
820 <<
ore::NV(
"MII", (
int)MII) <<
" > "
822 <<
"Refer to -pipeliner-max-mii.";
827 computeNodeFunctions(NodeSets);
829 registerPressureFilter(NodeSets);
831 colocateNodeSets(NodeSets);
833 checkNodeSets(NodeSets);
836 for (
auto &
I : NodeSets) {
837 dbgs() <<
" Rec NodeSet ";
844 groupRemainingNodes(NodeSets);
846 removeDuplicateNodes(NodeSets);
849 for (
auto &
I : NodeSets) {
850 dbgs() <<
" NodeSet ";
855 computeNodeOrder(NodeSets);
858 checkValidNodeOrder(Circuits);
861 Scheduled = schedulePipeline(Schedule);
866 Pass.ORE->emit([&]() {
868 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
869 <<
"Unable to find schedule";
876 if (numStages == 0) {
879 Pass.ORE->emit([&]() {
881 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
882 <<
"No need to pipeline - no overlapped iterations in schedule.";
889 <<
" : too many stages, abort\n");
890 NumFailLargeMaxStage++;
891 Pass.ORE->emit([&]() {
893 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
894 <<
"Too many stages in schedule: "
895 <<
ore::NV(
"numStages", (
int)numStages) <<
" > "
897 <<
". Refer to -pipeliner-max-stages.";
902 Pass.ORE->emit([&]() {
905 <<
"Pipelined succesfully!";
910 std::vector<MachineInstr *> OrderedInsts;
914 OrderedInsts.push_back(SU->getInstr());
915 Cycles[SU->getInstr()] =
Cycle;
920 for (
auto &KV : NewMIs) {
921 Cycles[KV.first] = Cycles[KV.second];
922 Stages[KV.first] = Stages[KV.second];
923 NewInstrChanges[KV.first] = InstrChanges[
getSUnit(KV.first)];
930 "Cannot serialize a schedule with InstrChanges!");
940 LoopPipelinerInfo->isMVEExpanderSupported() &&
954 for (
auto &KV : NewMIs)
955 MF.deleteMachineInstr(KV.second);
966 assert(Phi.isPHI() &&
"Expecting a Phi.");
970 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
971 if (Phi.getOperand(i + 1).getMBB() !=
Loop)
972 InitVal = Phi.getOperand(i).getReg();
974 LoopVal = Phi.getOperand(i).getReg();
976 assert(InitVal && LoopVal &&
"Unexpected Phi structure.");
982 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
983 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
984 return Phi.getOperand(i).getReg();
993 while (!Worklist.
empty()) {
995 for (
const auto &
SI : SU->
Succs) {
998 if (Visited.
count(SuccSU))
1011 if (!getUnderlyingObjects())
1036bool SUnitWithMemInfo::getUnderlyingObjects() {
1038 if (!
MI->hasOneMemOperand())
1056 const SUnitWithMemInfo &Dst,
1061 if (Src.isTriviallyDisjoint(Dst))
1075 if (Src.isUnknown() || Dst.isUnknown())
1077 if (Src.MemOpValue == Dst.MemOpValue && Src.MemOpOffset <= Dst.MemOpOffset)
1088 for (
const Value *SrcObj : Src.UnderlyingObjs)
1089 for (
const Value *DstObj : Dst.UnderlyingObjs)
1097void LoopCarriedOrderDepsTracker::NoBarrierInstsChunk::append(SUnit *SU) {
1100 Stores.emplace_back(SU);
1101 else if (
MI->mayLoad())
1102 Loads.emplace_back(SU);
1103 else if (
MI->mayRaiseFPException())
1104 FPExceptions.emplace_back(SU);
1112 : DAG(SSD), BAA(BAA), SUnits(DAG->SUnits), N(SUnits.
size()),
1113 LoopCarried(N,
BitVector(N)), TII(TII), TRI(TRI) {}
1117 for (
auto &SU : SUnits) {
1118 auto Tagged = getInstrTag(&SU);
1123 TaggedSUnits.emplace_back(&SU, *Tagged);
1126 computeDependenciesAux();
1129std::optional<LoopCarriedOrderDepsTracker::InstrTag>
1130LoopCarriedOrderDepsTracker::getInstrTag(
SUnit *SU)
const {
1132 if (
TII->isGlobalMemoryObject(
MI))
1133 return InstrTag::Barrier;
1135 if (
MI->mayStore() ||
1136 (
MI->mayLoad() && !
MI->isDereferenceableInvariantLoad()))
1137 return InstrTag::LoadOrStore;
1139 if (
MI->mayRaiseFPException())
1140 return InstrTag::FPExceptions;
1142 return std::nullopt;
1145void LoopCarriedOrderDepsTracker::addDependenciesBetweenSUs(
1146 const SUnitWithMemInfo &Src,
const SUnitWithMemInfo &Dst) {
1148 if (Src.SU == Dst.SU)
1152 setLoopCarriedDep(Src.SU, Dst.SU);
1155void LoopCarriedOrderDepsTracker::addLoopCarriedDepenenciesForChunks(
1156 const NoBarrierInstsChunk &From,
const NoBarrierInstsChunk &To) {
1158 for (
const SUnitWithMemInfo &Src : From.Loads)
1159 for (
const SUnitWithMemInfo &Dst : To.Stores)
1160 addDependenciesBetweenSUs(Src, Dst);
1163 for (
const SUnitWithMemInfo &Src : From.Stores)
1164 for (
const SUnitWithMemInfo &Dst : To.Loads)
1165 addDependenciesBetweenSUs(Src, Dst);
1168 for (
const SUnitWithMemInfo &Src : From.Stores)
1169 for (
const SUnitWithMemInfo &Dst : To.Stores)
1170 addDependenciesBetweenSUs(Src, Dst);
1173void LoopCarriedOrderDepsTracker::computeDependenciesAux() {
1175 SUnit *FirstBarrier =
nullptr;
1176 SUnit *LastBarrier =
nullptr;
1177 for (
const auto &TSU : TaggedSUnits) {
1178 InstrTag
Tag = TSU.getTag();
1179 SUnit *SU = TSU.getPointer();
1181 case InstrTag::Barrier:
1185 Chunks.emplace_back();
1187 case InstrTag::LoadOrStore:
1188 case InstrTag::FPExceptions:
1189 Chunks.back().append(SU);
1197 for (
const NoBarrierInstsChunk &Chunk : Chunks)
1198 addLoopCarriedDepenenciesForChunks(Chunk, Chunk);
1227 assert(LastBarrier &&
"Both barriers should be set.");
1230 for (
const SUnitWithMemInfo &Dst : Chunks.front().Loads)
1231 setLoopCarriedDep(LastBarrier, Dst.SU);
1232 for (
const SUnitWithMemInfo &Dst : Chunks.front().Stores)
1233 setLoopCarriedDep(LastBarrier, Dst.SU);
1234 for (
const SUnitWithMemInfo &Dst : Chunks.front().FPExceptions)
1235 setLoopCarriedDep(LastBarrier, Dst.SU);
1238 for (
const SUnitWithMemInfo &Src : Chunks.back().Loads)
1239 setLoopCarriedDep(Src.SU, FirstBarrier);
1240 for (
const SUnitWithMemInfo &Src : Chunks.back().Stores)
1241 setLoopCarriedDep(Src.SU, FirstBarrier);
1242 for (
const SUnitWithMemInfo &Src : Chunks.back().FPExceptions)
1243 setLoopCarriedDep(Src.SU, FirstBarrier);
1246 if (FirstBarrier != LastBarrier)
1247 setLoopCarriedDep(LastBarrier, FirstBarrier);
1256LoopCarriedEdges SwingSchedulerDAG::addLoopCarriedDependences() {
1257 LoopCarriedEdges LCE;
1261 LCODTracker.computeDependencies();
1262 for (
unsigned I = 0;
I != SUnits.size();
I++)
1263 for (
const int Succ : LCODTracker.getLoopCarried(
I).set_bits())
1276void SwingSchedulerDAG::updatePhiDependences() {
1278 const TargetSubtargetInfo &
ST = MF.getSubtarget<TargetSubtargetInfo>();
1281 for (SUnit &
I : SUnits) {
1286 MachineInstr *
MI =
I.getInstr();
1288 for (
const MachineOperand &MO :
MI->operands()) {
1295 UI =
MRI.use_instr_begin(
Reg),
1296 UE =
MRI.use_instr_end();
1298 MachineInstr *
UseMI = &*UI;
1299 SUnit *SU = getSUnit(
UseMI);
1325 }
else if (MO.isUse()) {
1328 if (
DefMI ==
nullptr)
1330 SUnit *SU = getSUnit(
DefMI);
1335 ST.adjustSchedDependency(SU, 0, &
I, MO.getOperandNo(), Dep,
1342 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
1351 for (
auto &PI :
I.Preds) {
1352 MachineInstr *PMI = PI.getSUnit()->getInstr();
1354 if (
I.getInstr()->isPHI()) {
1363 for (
const SDep &
D : RemoveDeps)
1370void SwingSchedulerDAG::changeDependences() {
1374 for (SUnit &
I : SUnits) {
1375 unsigned BasePos = 0, OffsetPos = 0;
1377 int64_t NewOffset = 0;
1378 if (!canUseLastOffsetValue(
I.getInstr(), BasePos, OffsetPos, NewBase,
1383 Register OrigBase =
I.getInstr()->getOperand(BasePos).getReg();
1384 MachineInstr *
DefMI =
MRI.getUniqueVRegDef(OrigBase);
1387 SUnit *DefSU = getSUnit(
DefMI);
1391 MachineInstr *LastMI =
MRI.getUniqueVRegDef(NewBase);
1394 SUnit *LastSU = getSUnit(LastMI);
1398 if (Topo.IsReachable(&
I, LastSU))
1403 for (
const SDep &
P :
I.Preds)
1404 if (
P.getSUnit() == DefSU)
1406 for (
const SDep &
D : Deps) {
1407 Topo.RemovePred(&
I,
D.getSUnit());
1412 for (
auto &
P : LastSU->
Preds)
1415 for (
const SDep &
D : Deps) {
1416 Topo.RemovePred(LastSU,
D.getSUnit());
1423 Topo.AddPred(LastSU, &
I);
1428 InstrChanges[&
I] = std::make_pair(NewBase, NewOffset);
1439 std::vector<MachineInstr *> &OrderedInsts,
1447 Stage <= LastStage; ++Stage) {
1450 Instrs[
Cycle].push_front(SU);
1457 std::deque<SUnit *> &CycleInstrs = Instrs[
Cycle];
1459 for (
SUnit *SU : CycleInstrs) {
1461 OrderedInsts.push_back(
MI);
1471struct FuncUnitSorter {
1472 const InstrItineraryData *InstrItins;
1473 const MCSubtargetInfo *STI;
1474 DenseMap<InstrStage::FuncUnits, unsigned> Resources;
1476 FuncUnitSorter(
const TargetSubtargetInfo &TSI)
1477 : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
1482 unsigned minFuncUnits(
const MachineInstr *Inst,
1485 unsigned min = UINT_MAX;
1486 if (InstrItins && !InstrItins->
isEmpty()) {
1487 for (
const InstrStage &IS :
1489 InstrItins->
endStage(SchedClass))) {
1492 if (numAlternatives < min) {
1493 min = numAlternatives;
1500 const MCSchedClassDesc *SCDesc =
1507 for (
const MCWriteProcResEntry &PRE :
1510 if (!PRE.ReleaseAtCycle)
1512 const MCProcResourceDesc *ProcResource =
1514 unsigned NumUnits = ProcResource->
NumUnits;
1515 if (NumUnits < min) {
1517 F = PRE.ProcResourceIdx;
1522 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1530 void calcCriticalResources(MachineInstr &
MI) {
1531 unsigned SchedClass =
MI.getDesc().getSchedClass();
1532 if (InstrItins && !InstrItins->
isEmpty()) {
1533 for (
const InstrStage &IS :
1535 InstrItins->
endStage(SchedClass))) {
1538 Resources[FuncUnits]++;
1543 const MCSchedClassDesc *SCDesc =
1550 for (
const MCWriteProcResEntry &PRE :
1553 if (!PRE.ReleaseAtCycle)
1555 Resources[PRE.ProcResourceIdx]++;
1559 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1563 bool operator()(
const MachineInstr *IS1,
const MachineInstr *IS2)
const {
1565 unsigned MFUs1 = minFuncUnits(IS1, F1);
1566 unsigned MFUs2 = minFuncUnits(IS2, F2);
1569 return MFUs1 > MFUs2;
1574class HighRegisterPressureDetector {
1575 MachineBasicBlock *OrigMBB;
1576 const MachineRegisterInfo &
MRI;
1577 const TargetRegisterInfo *
TRI;
1579 const unsigned PSetNum;
1585 std::vector<unsigned> InitSetPressure;
1589 std::vector<unsigned> PressureSetLimit;
1591 DenseMap<MachineInstr *, RegisterOperands> ROMap;
1593 using Instr2LastUsesTy = DenseMap<MachineInstr *, SmallDenseSet<Register, 4>>;
1596 using OrderedInstsTy = std::vector<MachineInstr *>;
1597 using Instr2StageTy = DenseMap<MachineInstr *, unsigned>;
1600 static void dumpRegisterPressures(
const std::vector<unsigned> &Pressures) {
1601 if (Pressures.size() == 0) {
1605 for (
unsigned P : Pressures) {
1616 VirtRegOrUnit VRegOrUnit =
1618 : VirtRegOrUnit(static_cast<MCRegUnit>(
Reg.id()));
1619 for (
auto PSetIter =
MRI.getPressureSets(VRegOrUnit); PSetIter.isValid();
1621 dbgs() << *PSetIter <<
' ';
1626 void increaseRegisterPressure(std::vector<unsigned> &Pressure,
1629 VirtRegOrUnit VRegOrUnit =
1631 : VirtRegOrUnit(static_cast<MCRegUnit>(
Reg.id()));
1632 auto PSetIter =
MRI.getPressureSets(VRegOrUnit);
1633 unsigned Weight = PSetIter.getWeight();
1634 for (; PSetIter.isValid(); ++PSetIter)
1635 Pressure[*PSetIter] += Weight;
1638 void decreaseRegisterPressure(std::vector<unsigned> &Pressure,
1640 auto PSetIter =
MRI.getPressureSets(VirtRegOrUnit(
Reg));
1641 unsigned Weight = PSetIter.getWeight();
1642 for (; PSetIter.isValid(); ++PSetIter) {
1643 auto &
P = Pressure[*PSetIter];
1645 "register pressure must be greater than or equal weight");
1667 void computeLiveIn() {
1668 DenseSet<Register>
Used;
1669 for (
auto &
MI : *OrigMBB) {
1670 if (
MI.isDebugInstr())
1672 for (
auto &Use : ROMap[&
MI].
Uses) {
1675 Use.VRegOrUnit.isVirtualReg()
1676 ?
Use.VRegOrUnit.asVirtualReg()
1677 :
Register(
static_cast<unsigned>(
Use.VRegOrUnit.asMCRegUnit()));
1682 if (isReservedRegister(
Reg))
1684 if (isDefinedInThisLoop(
Reg))
1690 for (
auto LiveIn : Used)
1691 increaseRegisterPressure(InitSetPressure, LiveIn);
1695 void computePressureSetLimit(
const RegisterClassInfo &RCI) {
1696 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1711 Instr2LastUsesTy computeLastUses(
const OrderedInstsTy &OrderedInsts,
1712 Instr2StageTy &Stages)
const {
1717 DenseSet<Register> TargetRegs;
1718 const auto UpdateTargetRegs = [
this, &TargetRegs](
Register Reg) {
1719 if (isDefinedInThisLoop(
Reg))
1722 for (MachineInstr *
MI : OrderedInsts) {
1725 UpdateTargetRegs(
Reg);
1727 for (
auto &Use : ROMap.
find(
MI)->getSecond().Uses) {
1730 ?
Use.VRegOrUnit.asVirtualReg()
1732 Use.VRegOrUnit.asMCRegUnit()));
1733 UpdateTargetRegs(
Reg);
1738 const auto InstrScore = [&Stages](MachineInstr *
MI) {
1739 return Stages[
MI] +
MI->isPHI();
1742 DenseMap<Register, MachineInstr *> LastUseMI;
1744 for (
auto &Use : ROMap.
find(
MI)->getSecond().Uses) {
1747 Use.VRegOrUnit.isVirtualReg()
1748 ?
Use.VRegOrUnit.asVirtualReg()
1749 :
Register(
static_cast<unsigned>(
Use.VRegOrUnit.asMCRegUnit()));
1754 MachineInstr *Orig = Ite->second;
1755 MachineInstr *
New =
MI;
1756 if (InstrScore(Orig) < InstrScore(New))
1762 Instr2LastUsesTy LastUses;
1763 for (
auto [
Reg,
MI] : LastUseMI)
1764 LastUses[
MI].insert(
Reg);
1780 std::vector<unsigned>
1781 computeMaxSetPressure(
const OrderedInstsTy &OrderedInsts,
1782 Instr2StageTy &Stages,
1783 const unsigned StageCount)
const {
1784 using RegSetTy = SmallDenseSet<Register, 16>;
1790 auto CurSetPressure = InitSetPressure;
1791 auto MaxSetPressure = InitSetPressure;
1792 auto LastUses = computeLastUses(OrderedInsts, Stages);
1795 dbgs() <<
"Ordered instructions:\n";
1796 for (MachineInstr *
MI : OrderedInsts) {
1797 dbgs() <<
"Stage " << Stages[
MI] <<
": ";
1802 const auto InsertReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1803 VirtRegOrUnit VRegOrUnit) {
1817 increaseRegisterPressure(CurSetPressure,
Reg);
1821 const auto EraseReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1827 if (!RegSet.contains(
Reg))
1832 decreaseRegisterPressure(CurSetPressure,
Reg);
1836 for (
unsigned I = 0;
I < StageCount;
I++) {
1837 for (MachineInstr *
MI : OrderedInsts) {
1838 const auto Stage = Stages[
MI];
1842 const unsigned Iter =
I - Stage;
1844 for (
auto &Def : ROMap.
find(
MI)->getSecond().Defs)
1845 InsertReg(LiveRegSets[Iter],
Def.VRegOrUnit);
1847 for (
auto LastUse : LastUses[
MI]) {
1850 EraseReg(LiveRegSets[Iter - 1], LastUse);
1852 EraseReg(LiveRegSets[Iter], LastUse);
1856 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1857 MaxSetPressure[PSet] =
1858 std::max(MaxSetPressure[PSet], CurSetPressure[PSet]);
1861 dbgs() <<
"CurSetPressure=";
1862 dumpRegisterPressures(CurSetPressure);
1863 dbgs() <<
" iter=" << Iter <<
" stage=" << Stage <<
":";
1869 return MaxSetPressure;
1873 HighRegisterPressureDetector(MachineBasicBlock *OrigMBB,
1874 const MachineFunction &MF)
1875 : OrigMBB(OrigMBB),
MRI(MF.getRegInfo()),
1876 TRI(MF.getSubtarget().getRegisterInfo()),
1877 PSetNum(
TRI->getNumRegPressureSets()), InitSetPressure(PSetNum, 0),
1878 PressureSetLimit(PSetNum, 0) {}
1882 void init(
const RegisterClassInfo &RCI) {
1883 for (MachineInstr &
MI : *OrigMBB) {
1884 if (
MI.isDebugInstr())
1886 ROMap[&
MI].collect(
MI, *
TRI,
MRI,
false,
true);
1890 computePressureSetLimit(RCI);
1895 bool detect(
const SwingSchedulerDAG *SSD, SMSchedule &Schedule,
1896 const unsigned MaxStage)
const {
1898 "the percentage of the margin must be between 0 to 100");
1900 OrderedInstsTy OrderedInsts;
1901 Instr2StageTy Stages;
1903 const auto MaxSetPressure =
1904 computeMaxSetPressure(OrderedInsts, Stages, MaxStage + 1);
1907 dbgs() <<
"Dump MaxSetPressure:\n";
1908 for (
unsigned I = 0;
I < MaxSetPressure.size();
I++) {
1909 dbgs() <<
format(
"MaxSetPressure[%d]=%d\n",
I, MaxSetPressure[
I]);
1914 for (
unsigned PSet = 0; PSet < PSetNum; PSet++) {
1915 unsigned Limit = PressureSetLimit[PSet];
1918 <<
" Margin=" << Margin <<
"\n");
1919 if (Limit < MaxSetPressure[PSet] + Margin) {
1922 <<
"Rejected the schedule because of too high register pressure\n");
1938unsigned SwingSchedulerDAG::calculateResMII() {
1940 ResourceManager
RM(&MF.getSubtarget(),
this);
1941 return RM.calculateResMII();
1950unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1951 unsigned RecMII = 0;
1953 for (NodeSet &Nodes : NodeSets) {
1957 unsigned Delay = Nodes.getLatency();
1958 unsigned Distance = 1;
1961 unsigned CurMII = (Delay + Distance - 1) / Distance;
1962 Nodes.setRecMII(CurMII);
1963 if (CurMII > RecMII)
1971void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1972 SwingSchedulerDAG *DAG) {
1973 BitVector
Added(SUnits.size());
1974 DenseMap<int, int> OutputDeps;
1975 for (
int i = 0, e = SUnits.size(); i != e; ++i) {
1978 for (
auto &OE : DAG->DDG->getOutEdges(&SUnits[i])) {
1981 if (OE.isOutputDep()) {
1982 int N = OE.getDst()->NodeNum;
1984 auto Dep = OutputDeps.
find(BackEdge);
1985 if (Dep != OutputDeps.
end()) {
1986 BackEdge = Dep->second;
1987 OutputDeps.
erase(Dep);
1989 OutputDeps[
N] = BackEdge;
1992 if (OE.getDst()->isBoundaryNode() || OE.isArtificial())
2004 int N = OE.getDst()->NodeNum;
2006 AdjK[i].push_back(
N);
2012 for (
auto &IE : DAG->DDG->getInEdges(&SUnits[i])) {
2013 SUnit *Src =
IE.getSrc();
2014 SUnit *Dst =
IE.getDst();
2017 if (
IE.isOrderDep() && Src->getInstr()->mayLoad()) {
2018 int N = Src->NodeNum;
2020 AdjK[i].push_back(
N);
2027 for (
auto &OD : OutputDeps)
2028 if (!
Added.test(OD.second)) {
2029 AdjK[OD.first].push_back(OD.second);
2030 Added.set(OD.second);
2036bool SwingSchedulerDAG::Circuits::circuit(
int V,
int S, NodeSetType &NodeSets,
2037 const SwingSchedulerDAG *DAG,
2039 SUnit *SV = &SUnits[
V];
2044 for (
auto W : AdjK[V]) {
2045 if (NumPaths > MaxPaths)
2056 if (!Blocked.test(W)) {
2057 if (circuit(W, S, NodeSets, DAG,
2058 Node2Idx->at(W) < Node2Idx->at(V) ?
true : HasBackedge))
2066 for (
auto W : AdjK[V]) {
2077void SwingSchedulerDAG::Circuits::unblock(
int U) {
2079 SmallPtrSet<SUnit *, 4> &BU =
B[
U];
2080 while (!BU.
empty()) {
2081 SmallPtrSet<SUnit *, 4>::iterator
SI = BU.
begin();
2082 assert(SI != BU.
end() &&
"Invalid B set.");
2085 if (Blocked.test(
W->NodeNum))
2086 unblock(
W->NodeNum);
2092void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
2093 Circuits Cir(SUnits, Topo);
2095 Cir.createAdjacencyStructure(
this);
2096 for (
int I = 0,
E = SUnits.size();
I !=
E; ++
I) {
2098 Cir.circuit(
I,
I, NodeSets,
this);
2120void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
2121 for (SUnit &SU : DAG->
SUnits) {
2131 for (
auto &Dep : SU.
Preds) {
2132 SUnit *TmpSU = Dep.getSUnit();
2133 MachineInstr *TmpMI = TmpSU->
getInstr();
2144 if (PHISUs.
size() == 0 || SrcSUs.
size() == 0)
2152 for (
auto &Dep : PHISUs[Index]->Succs) {
2156 SUnit *TmpSU = Dep.getSUnit();
2157 MachineInstr *TmpMI = TmpSU->
getInstr();
2166 if (UseSUs.
size() == 0)
2171 for (
auto *
I : UseSUs) {
2172 for (
auto *Src : SrcSUs) {
2188void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
2189 ScheduleInfo.resize(SUnits.size());
2192 for (
int I : Topo) {
2193 const SUnit &SU = SUnits[
I];
2200 for (
int I : Topo) {
2202 int zeroLatencyDepth = 0;
2203 SUnit *SU = &SUnits[
I];
2204 for (
const auto &IE : DDG->getInEdges(SU)) {
2205 SUnit *Pred =
IE.getSrc();
2206 if (
IE.getLatency() == 0)
2208 std::max(zeroLatencyDepth, getZeroLatencyDepth(Pred) + 1);
2209 if (
IE.ignoreDependence(
true))
2211 asap = std::max(asap, (
int)(getASAP(Pred) +
IE.getLatency() -
2212 IE.getDistance() * MII));
2214 maxASAP = std::max(maxASAP, asap);
2215 ScheduleInfo[
I].ASAP = asap;
2216 ScheduleInfo[
I].ZeroLatencyDepth = zeroLatencyDepth;
2222 int zeroLatencyHeight = 0;
2223 SUnit *SU = &SUnits[
I];
2224 for (
const auto &OE : DDG->getOutEdges(SU)) {
2225 SUnit *Succ = OE.getDst();
2228 if (OE.getLatency() == 0)
2230 std::max(zeroLatencyHeight, getZeroLatencyHeight(Succ) + 1);
2231 if (OE.ignoreDependence(
true))
2233 alap = std::min(alap, (
int)(getALAP(Succ) - OE.getLatency() +
2234 OE.getDistance() * MII));
2237 ScheduleInfo[
I].ALAP = alap;
2238 ScheduleInfo[
I].ZeroLatencyHeight = zeroLatencyHeight;
2242 for (NodeSet &
I : NodeSets)
2243 I.computeNodeSetInfo(
this);
2246 for (
unsigned i = 0; i < SUnits.size(); i++) {
2247 dbgs() <<
"\tNode " << i <<
":\n";
2248 dbgs() <<
"\t ASAP = " << getASAP(&SUnits[i]) <<
"\n";
2249 dbgs() <<
"\t ALAP = " << getALAP(&SUnits[i]) <<
"\n";
2250 dbgs() <<
"\t MOV = " << getMOV(&SUnits[i]) <<
"\n";
2251 dbgs() <<
"\t D = " << getDepth(&SUnits[i]) <<
"\n";
2252 dbgs() <<
"\t H = " << getHeight(&SUnits[i]) <<
"\n";
2253 dbgs() <<
"\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) <<
"\n";
2254 dbgs() <<
"\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) <<
"\n";
2269 SUnit *PredSU = IE.getSrc();
2270 if (S && S->count(PredSU) == 0)
2272 if (IE.ignoreDependence(
true))
2283 SUnit *SuccSU = OE.getDst();
2284 if (!OE.isAntiDep())
2286 if (S && S->count(SuccSU) == 0)
2292 return !Preds.
empty();
2305 SUnit *SuccSU = OE.getDst();
2306 if (S && S->count(SuccSU) == 0)
2308 if (OE.ignoreDependence(
false))
2319 SUnit *PredSU = IE.getSrc();
2320 if (!IE.isAntiDep())
2322 if (S && S->count(PredSU) == 0)
2328 return !Succs.
empty();
2344 if (!Visited.
insert(Cur).second)
2345 return Path.contains(Cur);
2346 bool FoundPath =
false;
2348 if (!OE.ignoreDependence(
false))
2350 computePath(OE.getDst(), Path, DestNodes, Exclude, Visited, DDG);
2352 if (IE.isAntiDep() && IE.getDistance() == 0)
2354 computePath(IE.getSrc(), Path, DestNodes, Exclude, Visited, DDG);
2369 for (
SUnit *SU : NS) {
2375 if (
Reg.isVirtual())
2377 else if (
MRI.isAllocatable(
Reg))
2378 for (MCRegUnit Unit :
TRI->regunits(
Reg.asMCReg()))
2382 for (
SUnit *SU : NS)
2386 if (
Reg.isVirtual()) {
2390 }
else if (
MRI.isAllocatable(
Reg)) {
2391 for (MCRegUnit Unit :
TRI->regunits(
Reg.asMCReg()))
2402void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
2403 for (
auto &NS : NodeSets) {
2407 IntervalPressure RecRegPressure;
2408 RegPressureTracker RecRPTracker(RecRegPressure);
2409 RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(),
false,
true);
2411 RecRPTracker.closeBottom();
2413 std::vector<SUnit *> SUnits(NS.begin(), NS.end());
2414 llvm::sort(SUnits, [](
const SUnit *
A,
const SUnit *
B) {
2415 return A->NodeNum >
B->NodeNum;
2418 for (
auto &SU : SUnits) {
2424 RecRPTracker.setPos(std::next(CurInstI));
2426 RegPressureDelta RPDelta;
2428 RecRPTracker.getMaxUpwardPressureDelta(SU->
getInstr(),
nullptr, RPDelta,
2433 dbgs() <<
"Excess register pressure: SU(" << SU->
NodeNum <<
") "
2436 NS.setExceedPressure(SU);
2439 RecRPTracker.recede();
2446void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
2447 unsigned Colocate = 0;
2448 for (
int i = 0, e = NodeSets.size(); i < e; ++i) {
2450 SmallSetVector<SUnit *, 8>
S1;
2453 for (
int j = i + 1;
j <
e; ++
j) {
2457 SmallSetVector<SUnit *, 8> S2;
2474void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
2479 for (
auto &NS : NodeSets) {
2480 if (NS.getRecMII() > 2)
2482 if (NS.getMaxDepth() > MII)
2491void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
2492 SetVector<SUnit *> NodesAdded;
2493 SmallPtrSet<SUnit *, 8> Visited;
2496 for (NodeSet &
I : NodeSets) {
2497 SmallSetVector<SUnit *, 8>
N;
2500 SetVector<SUnit *>
Path;
2501 for (SUnit *NI :
N) {
2503 computePath(NI, Path, NodesAdded,
I, Visited, DDG.get());
2510 if (
succ_L(NodesAdded,
N, DDG.get())) {
2511 SetVector<SUnit *>
Path;
2512 for (SUnit *NI :
N) {
2514 computePath(NI, Path,
I, NodesAdded, Visited, DDG.get());
2525 SmallSetVector<SUnit *, 8>
N;
2526 if (
succ_L(NodesAdded,
N, DDG.get()))
2528 addConnectedNodes(
I, NewSet, NodesAdded);
2529 if (!NewSet.
empty())
2530 NodeSets.push_back(NewSet);
2535 if (
pred_L(NodesAdded,
N, DDG.get()))
2537 addConnectedNodes(
I, NewSet, NodesAdded);
2538 if (!NewSet.
empty())
2539 NodeSets.push_back(NewSet);
2543 for (SUnit &SU : SUnits) {
2544 if (NodesAdded.
count(&SU) == 0) {
2546 addConnectedNodes(&SU, NewSet, NodesAdded);
2547 if (!NewSet.
empty())
2548 NodeSets.push_back(NewSet);
2554void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
2555 SetVector<SUnit *> &NodesAdded) {
2558 for (
auto &OE : DDG->getOutEdges(SU)) {
2560 if (!OE.isArtificial() && !
Successor->isBoundaryNode() &&
2562 addConnectedNodes(
Successor, NewSet, NodesAdded);
2564 for (
auto &IE : DDG->getInEdges(SU)) {
2565 SUnit *Predecessor =
IE.getSrc();
2566 if (!
IE.isArtificial() && NodesAdded.
count(Predecessor) == 0)
2567 addConnectedNodes(Predecessor, NewSet, NodesAdded);
2576 for (
SUnit *SU : Set1) {
2577 if (Set2.
count(SU) != 0)
2580 return !Result.empty();
2584void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
2585 for (NodeSetType::iterator
I = NodeSets.begin(),
E = NodeSets.end();
I !=
E;
2588 for (NodeSetType::iterator J =
I + 1; J !=
E;) {
2593 for (SUnit *SU : *J)
2605void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
2606 for (NodeSetType::iterator
I = NodeSets.begin(),
E = NodeSets.end();
I !=
E;
2608 for (NodeSetType::iterator J =
I + 1; J !=
E;) {
2609 J->remove_if([&](SUnit *SUJ) {
return I->count(SUJ); });
2624void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
2625 SmallSetVector<SUnit *, 8>
R;
2628 for (
auto &Nodes : NodeSets) {
2631 SmallSetVector<SUnit *, 8>
N;
2646 }
else if (NodeSets.size() == 1) {
2647 for (
const auto &
N : Nodes)
2648 if (
N->Succs.size() == 0)
2654 SUnit *maxASAP =
nullptr;
2655 for (SUnit *SU : Nodes) {
2656 if (maxASAP ==
nullptr || getASAP(SU) > getASAP(maxASAP) ||
2657 (getASAP(SU) == getASAP(maxASAP) && SU->
NodeNum > maxASAP->
NodeNum))
2665 while (!
R.empty()) {
2666 if (Order == TopDown) {
2670 while (!
R.empty()) {
2671 SUnit *maxHeight =
nullptr;
2672 for (SUnit *
I : R) {
2673 if (maxHeight ==
nullptr || getHeight(
I) > getHeight(maxHeight))
2675 else if (getHeight(
I) == getHeight(maxHeight) &&
2676 getZeroLatencyHeight(
I) > getZeroLatencyHeight(maxHeight))
2678 else if (getHeight(
I) == getHeight(maxHeight) &&
2679 getZeroLatencyHeight(
I) ==
2680 getZeroLatencyHeight(maxHeight) &&
2681 getMOV(
I) < getMOV(maxHeight))
2686 R.remove(maxHeight);
2687 for (
const auto &OE : DDG->getOutEdges(maxHeight)) {
2688 SUnit *SU = OE.getDst();
2689 if (Nodes.count(SU) == 0)
2693 if (OE.ignoreDependence(
false))
2702 for (
const auto &IE : DDG->getInEdges(maxHeight)) {
2703 SUnit *SU =
IE.getSrc();
2704 if (!
IE.isAntiDep())
2706 if (Nodes.count(SU) == 0)
2715 SmallSetVector<SUnit *, 8>
N;
2722 while (!
R.empty()) {
2723 SUnit *maxDepth =
nullptr;
2724 for (SUnit *
I : R) {
2725 if (maxDepth ==
nullptr || getDepth(
I) > getDepth(maxDepth))
2727 else if (getDepth(
I) == getDepth(maxDepth) &&
2728 getZeroLatencyDepth(
I) > getZeroLatencyDepth(maxDepth))
2730 else if (getDepth(
I) == getDepth(maxDepth) &&
2731 getZeroLatencyDepth(
I) == getZeroLatencyDepth(maxDepth) &&
2732 getMOV(
I) < getMOV(maxDepth))
2738 if (Nodes.isExceedSU(maxDepth)) {
2741 R.insert(Nodes.getNode(0));
2744 for (
const auto &IE : DDG->getInEdges(maxDepth)) {
2745 SUnit *SU =
IE.getSrc();
2746 if (Nodes.count(SU) == 0)
2757 for (
const auto &OE : DDG->getOutEdges(maxDepth)) {
2758 SUnit *SU = OE.getDst();
2759 if (!OE.isAntiDep())
2761 if (Nodes.count(SU) == 0)
2770 SmallSetVector<SUnit *, 8>
N;
2779 dbgs() <<
"Node order: ";
2781 dbgs() <<
" " <<
I->NodeNum <<
" ";
2788bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2795 bool scheduleFound =
false;
2796 std::unique_ptr<HighRegisterPressureDetector> HRPDetector;
2799 std::make_unique<HighRegisterPressureDetector>(Loop.getHeader(), MF);
2800 HRPDetector->init(RegClassInfo);
2803 for (
unsigned II = MII;
II <= MAX_II && !scheduleFound; ++
II) {
2815 int EarlyStart = INT_MIN;
2816 int LateStart = INT_MAX;
2825 dbgs() <<
format(
"\tes: %8x ls: %8x\n", EarlyStart, LateStart));
2827 if (EarlyStart > LateStart)
2828 scheduleFound =
false;
2829 else if (EarlyStart != INT_MIN && LateStart == INT_MAX)
2831 Schedule.
insert(SU, EarlyStart, EarlyStart + (
int)
II - 1,
II);
2832 else if (EarlyStart == INT_MIN && LateStart != INT_MAX)
2834 Schedule.
insert(SU, LateStart, LateStart - (
int)
II + 1,
II);
2835 else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2836 LateStart = std::min(LateStart, EarlyStart + (
int)
II - 1);
2845 scheduleFound = Schedule.
insert(SU, LateStart, EarlyStart,
II);
2847 scheduleFound = Schedule.
insert(SU, EarlyStart, LateStart,
II);
2850 scheduleFound = Schedule.
insert(SU, FirstCycle + getASAP(SU),
2851 FirstCycle + getASAP(SU) +
II - 1,
II);
2859 scheduleFound =
false;
2863 dbgs() <<
"\tCan't schedule\n";
2865 }
while (++NI != NE && scheduleFound);
2870 scheduleFound = DDG->isValidSchedule(Schedule);
2892 if (scheduleFound) {
2893 scheduleFound = LoopPipelinerInfo->shouldUseSchedule(*
this, Schedule);
2898 if (scheduleFound) {
2900 Pass.ORE->emit([&]() {
2901 return MachineOptimizationRemarkAnalysis(
2902 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
2903 <<
"Schedule found with Initiation Interval: "
2905 <<
", MaxStageCount: "
2919 if (!
Reg.isVirtual())
2921 if (
MRI.getVRegDef(
Reg)->getParent() !=
MI.getParent())
2933 if (!
Op.isReg() || !
Op.getReg().isVirtual())
2961 if (Def->getParent() != LoopBB)
2964 if (Def->isCopy()) {
2966 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
2968 CurReg = Def->getOperand(1).getReg();
2969 }
else if (Def->isPHI()) {
2975 }
else if (
TII->getIncrementValue(*Def,
Value)) {
2983 bool OffsetIsScalable;
2984 if (
TII->getMemOperandWithOffset(*Def, BaseOp,
Offset, OffsetIsScalable,
2987 CurReg = BaseOp->
getReg();
2999 if (CurReg == OrgReg)
3011bool SwingSchedulerDAG::computeDelta(
const MachineInstr &
MI,
int &Delta)
const {
3012 const TargetRegisterInfo *
TRI = MF.getSubtarget().getRegisterInfo();
3013 const MachineOperand *BaseOp;
3015 bool OffsetIsScalable;
3016 if (!
TII->getMemOperandWithOffset(
MI, BaseOp,
Offset, OffsetIsScalable,
TRI))
3020 if (OffsetIsScalable)
3023 if (!BaseOp->
isReg())
3036bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *
MI,
3038 unsigned &OffsetPos,
3044 unsigned BasePosLd, OffsetPosLd;
3050 MachineRegisterInfo &
MRI =
MI->getMF()->getRegInfo();
3051 MachineInstr *
Phi =
MRI.getVRegDef(BaseReg);
3052 if (!Phi || !
Phi->isPHI())
3060 MachineInstr *PrevDef =
MRI.getVRegDef(PrevReg);
3061 if (!PrevDef || PrevDef ==
MI)
3067 unsigned BasePos1 = 0, OffsetPos1 = 0;
3073 int64_t LoadOffset =
MI->getOperand(OffsetPosLd).getImm();
3075 MachineInstr *NewMI = MF.CloneMachineInstr(
MI);
3078 MF.deleteMachineInstr(NewMI);
3083 BasePos = BasePosLd;
3084 OffsetPos = OffsetPosLd;
3096 InstrChanges.find(SU);
3097 if (It != InstrChanges.
end()) {
3098 std::pair<Register, int64_t> RegAndOffset = It->second;
3099 unsigned BasePos, OffsetPos;
3100 if (!
TII->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos))
3102 Register BaseReg =
MI->getOperand(BasePos).getReg();
3108 if (BaseStageNum < DefStageNum) {
3110 int OffsetDiff = DefStageNum - BaseStageNum;
3111 if (DefCycleNum < BaseCycleNum) {
3117 MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
3132 while (Def->isPHI()) {
3133 if (!Visited.
insert(Def).second)
3135 for (
unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
3136 if (Def->getOperand(i + 1).getMBB() == BB) {
3137 Def =
MRI.getVRegDef(Def->getOperand(i).getReg());
3148 int DeltaB, DeltaO, Delta;
3155 int64_t OffsetB, OffsetO;
3156 bool OffsetBIsScalable, OffsetOIsScalable;
3158 if (!
TII->getMemOperandWithOffset(*BaseMI, BaseOpB, OffsetB,
3159 OffsetBIsScalable,
TRI) ||
3160 !
TII->getMemOperandWithOffset(*OtherMI, BaseOpO, OffsetO,
3161 OffsetOIsScalable,
TRI))
3164 if (OffsetBIsScalable || OffsetOIsScalable)
3174 if (!RegB.
isVirtual() || !RegO.isVirtual())
3179 if (!DefB || !DefO || !DefB->
isPHI() || !DefO->
isPHI())
3204 dbgs() <<
"Overlap check:\n";
3205 dbgs() <<
" BaseMI: ";
3207 dbgs() <<
" Base + " << OffsetB <<
" + I * " << Delta
3208 <<
", Len: " << AccessSizeB.
getValue() <<
"\n";
3209 dbgs() <<
" OtherMI: ";
3211 dbgs() <<
" Base + " << OffsetO <<
" + I * " << Delta
3212 <<
", Len: " << AccessSizeO.
getValue() <<
"\n";
3220 int64_t BaseMinAddr = OffsetB;
3221 int64_t OhterNextIterMaxAddr = OffsetO + Delta + AccessSizeO.
getValue() - 1;
3222 if (BaseMinAddr > OhterNextIterMaxAddr) {
3227 int64_t BaseMaxAddr = OffsetB + AccessSizeB.
getValue() - 1;
3228 int64_t OtherNextIterMinAddr = OffsetO + Delta;
3229 if (BaseMaxAddr < OtherNextIterMinAddr) {
3243 if ((!Edge.isOrderDep() && !Edge.isOutputDep()) || Edge.isArtificial() ||
3244 Edge.getDst()->isBoundaryNode())
3250 if (Edge.isOutputDep())
3255 assert(
SI !=
nullptr && DI !=
nullptr &&
"Expecting SUnit with an MI.");
3272void SwingSchedulerDAG::postProcessDAG() {
3273 for (
auto &M : Mutations)
3283 bool forward =
true;
3285 dbgs() <<
"Trying to insert node between " << StartCycle <<
" and "
3286 << EndCycle <<
" II: " <<
II <<
"\n";
3288 if (StartCycle > EndCycle)
3292 int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
3293 for (
int curCycle = StartCycle; curCycle != termCycle;
3294 forward ? ++curCycle : --curCycle) {
3297 ProcItinResources.canReserveResources(*SU, curCycle)) {
3299 dbgs() <<
"\tinsert at cycle " << curCycle <<
" ";
3304 ProcItinResources.reserveResources(*SU, curCycle);
3305 ScheduledInstrs[curCycle].push_back(SU);
3306 InstrToCycle.insert(std::make_pair(SU, curCycle));
3307 if (curCycle > LastCycle)
3308 LastCycle = curCycle;
3309 if (curCycle < FirstCycle)
3310 FirstCycle = curCycle;
3314 dbgs() <<
"\tfailed to insert at cycle " << curCycle <<
" ";
3325 for (
auto &
P : SU->
Preds)
3326 if (
P.getKind() ==
SDep::Anti &&
P.getSUnit()->getInstr()->isPHI())
3327 for (
auto &S :
P.getSUnit()->Succs)
3328 if (S.getKind() ==
SDep::Data && S.getSUnit()->getInstr()->isPHI())
3329 return P.getSUnit();
3342 for (
int cycle =
getFirstCycle(); cycle <= LastCycle; ++cycle) {
3345 if (IE.getSrc() ==
I) {
3346 int EarlyStart = cycle + IE.getLatency() - IE.getDistance() *
II;
3347 *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3352 if (OE.getDst() ==
I) {
3353 int LateStart = cycle - OE.getLatency() + OE.getDistance() *
II;
3354 *MinLateStart = std::min(*MinLateStart, LateStart);
3359 for (
const auto &Dep : SU->
Preds) {
3362 if (BE && Dep.getSUnit() == BE && !SU->
getInstr()->
isPHI() &&
3364 *MinLateStart = std::min(*MinLateStart, cycle);
3374 std::deque<SUnit *> &Insts)
const {
3376 bool OrderBeforeUse =
false;
3377 bool OrderAfterDef =
false;
3378 bool OrderBeforeDef =
false;
3379 unsigned MoveDef = 0;
3380 unsigned MoveUse = 0;
3385 for (std::deque<SUnit *>::iterator
I = Insts.begin(), E = Insts.end();
I != E;
3388 if (!MO.isReg() || !MO.getReg().isVirtual())
3392 unsigned BasePos, OffsetPos;
3393 if (ST.getInstrInfo()->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos))
3394 if (
MI->getOperand(BasePos).getReg() == Reg)
3398 std::tie(Reads, Writes) =
3399 (*I)->getInstr()->readsWritesVirtualRegister(Reg);
3401 OrderBeforeUse =
true;
3406 OrderAfterDef =
true;
3408 }
else if (MO.isUse() && Writes &&
stageScheduled(*
I) == StageInst1) {
3410 OrderBeforeUse =
true;
3414 OrderAfterDef =
true;
3418 OrderBeforeUse =
true;
3422 OrderAfterDef =
true;
3427 OrderBeforeUse =
true;
3433 OrderBeforeDef =
true;
3441 if (OE.getDst() != *
I)
3444 OrderBeforeUse =
true;
3451 else if ((OE.isAntiDep() || OE.isOutputDep()) &&
3453 OrderBeforeUse =
true;
3454 if ((MoveUse == 0) || (Pos < MoveUse))
3459 if (IE.getSrc() != *
I)
3461 if ((IE.isAntiDep() || IE.isOutputDep() || IE.isOrderDep()) &&
3463 OrderAfterDef =
true;
3470 if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3471 OrderBeforeUse =
false;
3476 OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3480 if (OrderBeforeUse && OrderAfterDef) {
3481 SUnit *UseSU = Insts.at(MoveUse);
3482 SUnit *DefSU = Insts.at(MoveDef);
3483 if (MoveUse > MoveDef) {
3484 Insts.erase(Insts.begin() + MoveUse);
3485 Insts.erase(Insts.begin() + MoveDef);
3487 Insts.erase(Insts.begin() + MoveDef);
3488 Insts.erase(Insts.begin() + MoveUse);
3498 Insts.push_front(SU);
3500 Insts.push_back(SU);
3508 assert(Phi.isPHI() &&
"Expecting a Phi.");
3515 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3523 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3541 if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3547 if (DMO.getReg() == LoopReg)
3558 if (InstrToCycle.count(IE.getSrc()))
3569 for (
auto &SU : SSD->
SUnits)
3574 while (!Worklist.
empty()) {
3576 if (DoNotPipeline.
count(SU))
3579 DoNotPipeline.
insert(SU);
3586 if (OE.getDistance() == 1)
3589 return DoNotPipeline;
3598 int NewLastCycle = INT_MIN;
3603 NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);
3610 if (IE.getDistance() == 0)
3611 NewCycle = std::max(InstrToCycle[IE.getSrc()], NewCycle);
3616 if (OE.getDistance() == 1)
3617 NewCycle = std::max(InstrToCycle[OE.getDst()], NewCycle);
3619 int OldCycle = InstrToCycle[&SU];
3620 if (OldCycle != NewCycle) {
3621 InstrToCycle[&SU] = NewCycle;
3626 <<
") is not pipelined; moving from cycle " << OldCycle
3627 <<
" to " << NewCycle <<
" Instr:" << *SU.
getInstr());
3652 if (FirstCycle + InitiationInterval <= NewCycle)
3655 NewLastCycle = std::max(NewLastCycle, NewCycle);
3657 LastCycle = NewLastCycle;
3674 int CycleDef = InstrToCycle[&SU];
3675 assert(StageDef != -1 &&
"Instruction should have been scheduled.");
3677 SUnit *Dst = OE.getDst();
3678 if (OE.isAssignedRegDep() && !Dst->isBoundaryNode())
3679 if (OE.getReg().isPhysical()) {
3682 if (InstrToCycle[Dst] <= CycleDef)
3700void SwingSchedulerDAG::checkValidNodeOrder(
const NodeSetType &Circuits)
const {
3703 typedef std::pair<SUnit *, unsigned> UnitIndex;
3704 std::vector<UnitIndex> Indices(
NodeOrder.size(), std::make_pair(
nullptr, 0));
3706 for (
unsigned i = 0, s =
NodeOrder.size(); i < s; ++i)
3707 Indices.push_back(std::make_pair(
NodeOrder[i], i));
3709 auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3710 return std::get<0>(i1) < std::get<0>(i2);
3723 for (
unsigned i = 0, s =
NodeOrder.size(); i < s; ++i) {
3727 bool PredBefore =
false;
3728 bool SuccBefore =
false;
3735 for (
const auto &IE : DDG->getInEdges(SU)) {
3736 SUnit *PredSU = IE.getSrc();
3737 unsigned PredIndex = std::get<1>(
3746 for (
const auto &OE : DDG->getOutEdges(SU)) {
3747 SUnit *SuccSU = OE.getDst();
3753 unsigned SuccIndex = std::get<1>(
3766 Circuits, [SU](
const NodeSet &Circuit) {
return Circuit.
count(SU); });
3771 NumNodeOrderIssues++;
3775 <<
" are scheduled before node " << SU->
NodeNum
3782 dbgs() <<
"Invalid node order found!\n";
3795 for (
SUnit *SU : Instrs) {
3797 for (
unsigned i = 0, e =
MI->getNumOperands(); i < e; ++i) {
3805 InstrChanges.find(SU);
3806 if (It != InstrChanges.
end()) {
3807 unsigned BasePos, OffsetPos;
3809 if (
TII->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos)) {
3813 MI->getOperand(OffsetPos).getImm() - It->second.second;
3826 unsigned TiedUseIdx = 0;
3827 if (
MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3829 OverlapReg =
MI->getOperand(TiedUseIdx).getReg();
3831 NewBaseReg =
MI->getOperand(i).getReg();
3840 const std::deque<SUnit *> &Instrs)
const {
3841 std::deque<SUnit *> NewOrderPhi;
3842 for (
SUnit *SU : Instrs) {
3844 NewOrderPhi.push_back(SU);
3846 std::deque<SUnit *> NewOrderI;
3847 for (
SUnit *SU : Instrs) {
3863 std::deque<SUnit *> &cycleInstrs =
3864 ScheduledInstrs[cycle + (stage * InitiationInterval)];
3866 ScheduledInstrs[cycle].push_front(SU);
3872 for (
int cycle =
getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3873 ScheduledInstrs.erase(cycle);
3883 std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[
Cycle];
3892 os <<
"Num nodes " <<
size() <<
" rec " << RecMII <<
" mov " << MaxMOV
3893 <<
" depth " << MaxDepth <<
" col " << Colocate <<
"\n";
3894 for (
const auto &
I : Nodes)
3895 os <<
" SU(" <<
I->NodeNum <<
") " << *(
I->getInstr());
3899#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3906 for (
SUnit *CI : cycleInstrs->second) {
3908 os <<
"(" << CI->
NodeNum <<
") ";
3919void ResourceManager::dumpMRT()
const {
3923 std::stringstream SS;
3925 SS << std::setw(4) <<
"Slot";
3926 for (
unsigned I = 1, E =
SM.getNumProcResourceKinds();
I < E; ++
I)
3927 SS << std::setw(3) <<
I;
3928 SS << std::setw(7) <<
"#Mops"
3930 for (
int Slot = 0; Slot < InitiationInterval; ++Slot) {
3931 SS << std::setw(4) << Slot;
3932 for (
unsigned I = 1, E =
SM.getNumProcResourceKinds();
I < E; ++
I)
3933 SS << std::setw(3) << MRT[Slot][
I];
3934 SS << std::setw(7) << NumScheduledMops[Slot] <<
"\n";
3943 unsigned ProcResourceID = 0;
3947 assert(SM.getNumProcResourceKinds() < 64 &&
3948 "Too many kinds of resources, unsupported");
3951 Masks.
resize(SM.getNumProcResourceKinds());
3952 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
3954 if (
Desc.SubUnitsIdxBegin)
3956 Masks[
I] = 1ULL << ProcResourceID;
3960 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
3962 if (!
Desc.SubUnitsIdxBegin)
3964 Masks[
I] = 1ULL << ProcResourceID;
3965 for (
unsigned U = 0; U <
Desc.NumUnits; ++U)
3966 Masks[
I] |= Masks[
Desc.SubUnitsIdxBegin[U]];
3971 dbgs() <<
"ProcResourceDesc:\n";
3972 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
3974 dbgs() <<
format(
" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
3975 ProcResource->
Name,
I, Masks[
I],
3978 dbgs() <<
" -----------------\n";
3986 dbgs() <<
"canReserveResources:\n";
3989 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
3995 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
4001 reserveResources(SCDesc,
Cycle);
4002 bool Result = !isOverbooked();
4003 unreserveResources(SCDesc,
Cycle);
4012 dbgs() <<
"reserveResources:\n";
4015 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
4021 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
4027 reserveResources(SCDesc,
Cycle);
4032 dbgs() <<
"reserveResources: done!\n\n";
4043 ++MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
4046 ++NumScheduledMops[positiveModulo(
C, InitiationInterval)];
4055 --MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
4058 --NumScheduledMops[positiveModulo(
C, InitiationInterval)];
4061bool ResourceManager::isOverbooked()
const {
4063 for (
int Slot = 0;
Slot < InitiationInterval; ++
Slot) {
4064 for (
unsigned I = 1,
E =
SM.getNumProcResourceKinds();
I <
E; ++
I) {
4065 const MCProcResourceDesc *
Desc =
SM.getProcResource(
I);
4066 if (MRT[Slot][
I] >
Desc->NumUnits)
4069 if (NumScheduledMops[Slot] > IssueWidth)
4075int ResourceManager::calculateResMIIDFA()
const {
4080 FuncUnitSorter FUS = FuncUnitSorter(*ST);
4081 for (SUnit &SU : DAG->
SUnits)
4082 FUS.calcCriticalResources(*SU.
getInstr());
4083 PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter>
4086 for (SUnit &SU : DAG->
SUnits)
4093 while (!FuncUnitOrder.empty()) {
4094 MachineInstr *
MI = FuncUnitOrder.top();
4095 FuncUnitOrder.pop();
4096 if (
TII->isZeroCost(
MI->getOpcode()))
4102 unsigned ReservedCycles = 0;
4103 auto *RI = Resources.
begin();
4104 auto *RE = Resources.
end();
4106 dbgs() <<
"Trying to reserve resource for " << NumCycles
4107 <<
" cycles for \n";
4110 for (
unsigned C = 0;
C < NumCycles; ++
C)
4112 if ((*RI)->canReserveResources(*
MI)) {
4113 (*RI)->reserveResources(*
MI);
4120 <<
", NumCycles:" << NumCycles <<
"\n");
4122 for (
unsigned C = ReservedCycles;
C < NumCycles; ++
C) {
4124 <<
"NewResource created to reserve resources"
4127 assert(NewResource->canReserveResources(*
MI) &&
"Reserve error.");
4128 NewResource->reserveResources(*
MI);
4129 Resources.
push_back(std::unique_ptr<DFAPacketizer>(NewResource));
4133 int Resmii = Resources.
size();
4140 return calculateResMIIDFA();
4147 for (
SUnit &SU : DAG->SUnits) {
4159 <<
" WriteProcRes: ";
4164 make_range(STI->getWriteProcResBegin(SCDesc),
4165 STI->getWriteProcResEnd(SCDesc))) {
4169 SM.getProcResource(PRE.ProcResourceIdx);
4170 dbgs() <<
Desc->Name <<
": " << PRE.ReleaseAtCycle <<
", ";
4173 ResourceCount[PRE.ProcResourceIdx] += PRE.ReleaseAtCycle;
4178 int Result = (NumMops + IssueWidth - 1) / IssueWidth;
4181 dbgs() <<
"#Mops: " << NumMops <<
", "
4182 <<
"IssueWidth: " << IssueWidth <<
", "
4183 <<
"Cycles: " << Result <<
"\n";
4188 std::stringstream SS;
4189 SS << std::setw(2) <<
"ID" << std::setw(16) <<
"Name" << std::setw(10)
4190 <<
"Units" << std::setw(10) <<
"Consumed" << std::setw(10) <<
"Cycles"
4195 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
4197 int Cycles = (ResourceCount[
I] +
Desc->NumUnits - 1) /
Desc->NumUnits;
4200 std::stringstream SS;
4201 SS << std::setw(2) <<
I << std::setw(16) <<
Desc->Name << std::setw(10)
4202 <<
Desc->NumUnits << std::setw(10) << ResourceCount[
I]
4203 << std::setw(10) << Cycles <<
"\n";
4207 if (Cycles > Result)
4214 InitiationInterval =
II;
4215 DFAResources.clear();
4216 DFAResources.resize(
II);
4217 for (
auto &
I : DFAResources)
4218 I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
4221 NumScheduledMops.clear();
4222 NumScheduledMops.resize(
II);
4226 if (Pred.isArtificial() || Dst->isBoundaryNode())
4231 return IgnoreAnti && (Pred.getKind() ==
SDep::Kind::Anti || Distance != 0);
4234SwingSchedulerDDG::SwingSchedulerDDGEdges &
4235SwingSchedulerDDG::getEdges(
const SUnit *SU) {
4237 return EntrySUEdges;
4243const SwingSchedulerDDG::SwingSchedulerDDGEdges &
4244SwingSchedulerDDG::getEdges(
const SUnit *SU)
const {
4246 return EntrySUEdges;
4252void SwingSchedulerDDG::addEdge(
const SUnit *SU,
4253 const SwingSchedulerDDGEdge &
Edge) {
4255 "Validation-only edges are not expected here.");
4256 auto &Edges = getEdges(SU);
4257 if (
Edge.getSrc() == SU)
4258 Edges.Succs.push_back(
Edge);
4260 Edges.Preds.push_back(
Edge);
4263void SwingSchedulerDDG::initEdges(SUnit *SU) {
4264 for (
const auto &PI : SU->
Preds) {
4265 SwingSchedulerDDGEdge
Edge(SU, PI,
false,
4270 for (
const auto &SI : SU->
Succs) {
4271 SwingSchedulerDDGEdge
Edge(SU, SI,
true,
4279 : EntrySU(EntrySU), ExitSU(ExitSU) {
4280 EdgesVec.resize(SUnits.size());
4285 for (
auto &SU : SUnits)
4289 for (
SUnit &SU : SUnits) {
4294 for (
SUnit *Dst : *OD) {
4297 Edge.setDistance(1);
4298 ValidationOnlyEdges.push_back(Edge);
4304const SwingSchedulerDDG::EdgesType &
4306 return getEdges(SU).Preds;
4309const SwingSchedulerDDG::EdgesType &
4311 return getEdges(SU).Succs;
4318 auto ExpandCycle = [&](
SUnit *SU) {
4325 SUnit *Src = Edge.getSrc();
4326 SUnit *Dst = Edge.getDst();
4327 if (!Src->isInstr() || !Dst->isInstr())
4329 int CycleSrc = ExpandCycle(Src);
4330 int CycleDst = ExpandCycle(Dst);
4331 int MaxLateStart = CycleDst + Edge.getDistance() *
II - Edge.getLatency();
4332 if (CycleSrc > MaxLateStart) {
4334 dbgs() <<
"Validation failed for edge from " << Src->NodeNum <<
" to "
4335 << Dst->NodeNum <<
"\n";
4345 for (
SUnit &SU : SUnits) {
4374 !
TII->isGlobalMemoryObject(FromMI) &&
4392 const auto DumpSU = [](
const SUnit *SU) {
4393 std::ostringstream OSS;
4394 OSS <<
"SU(" << SU->
NodeNum <<
")";
4398 dbgs() <<
" Loop carried edges from " << DumpSU(SU) <<
"\n"
4400 for (
SUnit *Dst : *Order)
4401 dbgs() <<
" " << DumpSU(Dst) <<
"\n";
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::optional< unsigned > getTag(const TargetRegisterInfo *TRI, const MachineInstr &MI, const LoadInfo &LI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file defines the DenseMap class.
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)
print mir2vec MIR2Vec Vocabulary Printer Pass
static cl::opt< int > SwpForceII("pipeliner-force-ii", cl::desc("Force pipeliner to use specified II."), cl::Hidden, cl::init(-1))
A command line argument to force pipeliner to use specified initial interval.
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
static bool hasPHICycleDFS(unsigned Reg, const DenseMap< unsigned, SmallVector< unsigned, 2 > > &PhiDeps, SmallSet< unsigned, 8 > &Visited, SmallSet< unsigned, 8 > &RecStack)
Depth-first search to detect cycles among PHI dependencies.
static cl::opt< bool > MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false), cl::desc("Use the MVE code generator for software pipelining"))
static cl::opt< int > RegPressureMargin("pipeliner-register-pressure-margin", cl::Hidden, cl::init(5), cl::desc("Margin representing the unused percentage of " "the register pressure limit"))
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, Register &InitVal, Register &LoopVal)
Return the register values for the operands of a Phi instruction.
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
static void computeScheduledInsts(const SwingSchedulerDAG *SSD, SMSchedule &Schedule, std::vector< MachineInstr * > &OrderedInsts, DenseMap< MachineInstr *, unsigned > &Stages)
Create an instruction stream that represents a single iteration and stage of each instruction.
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
static Register getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
static bool findLoopIncrementValue(const MachineOperand &Op, int &Value)
When Op is a value that is incremented recursively in a loop and there is a unique instruction that i...
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::desc("Ignore RecMII"))
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
static cl::opt< unsigned > SwpMaxNumStores("pipeliner-max-num-stores", cl::desc("Maximum number of stores allwed in the target loop."), cl::Hidden, cl::init(200))
A command line argument to limit the number of store instructions in the target basic block.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
static bool hasPHICycle(const MachineBasicBlock *LoopHeader, const MachineRegisterInfo &MRI)
static cl::opt< WindowSchedulingFlag > WindowSchedulingOption("window-sched", cl::Hidden, cl::init(WindowSchedulingFlag::WS_On), cl::desc("Set how to use window scheduling algorithm."), cl::values(clEnumValN(WindowSchedulingFlag::WS_Off, "off", "Turn off window algorithm."), clEnumValN(WindowSchedulingFlag::WS_On, "on", "Use window algorithm after SMS algorithm fails."), clEnumValN(WindowSchedulingFlag::WS_Force, "force", "Use window algorithm instead of SMS algorithm.")))
A command line argument to set the window scheduling option.
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
static cl::opt< int > SwpIISearchRange("pipeliner-ii-search-range", cl::desc("Range to search for II"), cl::Hidden, cl::init(10))
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited, SwingSchedulerDDG *DDG)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
static cl::opt< bool > LimitRegPressure("pipeliner-register-pressure", cl::Hidden, cl::init(false), cl::desc("Limit register pressure of scheduled loop"))
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
static bool hasLoopCarriedMemDep(const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst, BatchAAResults &BAA, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, const SwingSchedulerDAG *SSD)
Returns true if there is a loop-carried order dependency from Src to Dst.
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
static Register findUniqueOperandDefinedInLoop(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
Promote Memory to Register
This file provides utility analysis objects describing memory locations.
static constexpr unsigned SM(unsigned Version)
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PriorityQueue class.
Remove Loads Into Fake Uses
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Target-Independent Code Generator Pass Configuration Options pass.
Add loop-carried chain dependencies.
void computeDependencies()
The main function to compute loop-carried order-dependencies.
const BitVector & getLoopCarried(unsigned Idx) const
LoopCarriedOrderDepsTracker(SwingSchedulerDAG *SSD, BatchAAResults *BAA, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
AttributeList getAttributes() const
Return the attribute list for this Function.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isPostIncrement(const MachineInstr &MI) const override
Return true for post-incremented instructions.
DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &STI) const override
Create machine specific model for scheduling.
bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const override
For instructions with a base and offset, return the position of the base register and offset operands...
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
bool isEmpty() const
Returns true if there are no itineraries.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
TypeSize getValue() const
Represents a single loop in the control flow graph.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI StringRef getString() const
MachineInstrBundleIterator< const MachineInstr > const_iterator
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
filtered_mop_range all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool isRegSequence() const
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
const Value * getValue() const
Return the base address of the memory access.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
The main class in the implementation of the target independent software pipeliner pass.
bool runOnMachineFunction(MachineFunction &MF) override
The "main" function for implementing Swing Modulo Scheduling.
const TargetInstrInfo * TII
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineDominatorTree * MDT
const MachineLoopInfo * MLI
MachineOptimizationRemarkEmitter * ORE
RegisterClassInfo RegClassInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_instr_iterator< true, false, false, true > use_instr_iterator
use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the specified register,...
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
Expand the kernel using modulo variable expansion algorithm (MVE).
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
SUnit * getNode(unsigned i) const
void print(raw_ostream &os) const
void setRecMII(unsigned mii)
unsigned count(SUnit *SU) const
void setColocate(unsigned c)
int compareRecMII(NodeSet &RHS)
LLVM_DUMP_METHOD void dump() const
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A reimplementation of ModuloScheduleExpander.
PointerIntPair - This class implements a pair of a pointer and small integer.
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void addLiveRegs(ArrayRef< VRegMaskOrUnit > Regs)
Force liveness of virtual registers or physical register units.
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
int calculateResMII() const
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
void init(int II)
Initialize resources with the initiation interval II.
bool canReserveResources(SUnit &SU, int Cycle)
Check if the resources occupied by a machine instruction are available in the current state.
Kind
These are the different kinds of scheduling dependencies.
@ Order
Any other ordering dependency.
@ Anti
A register anti-dependence (aka WAR).
@ Data
Regular data dependence (aka true-dependence).
void setLatency(unsigned Lat)
Sets the latency for this edge.
@ Barrier
An unknown scheduling barrier.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
This class represents the scheduled code.
std::deque< SUnit * > reorderInstructions(const SwingSchedulerDAG *SSD, const std::deque< SUnit * > &Instrs) const
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
void dump() const
Utility function used for debugging to print the schedule.
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU, const SwingSchedulerDDG *DDG) const
Return true if all scheduled predecessors are loop-carried output/order dependencies.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts) const
Order the instructions within a cycle so that the definitions occur before the uses.
bool isValidSchedule(SwingSchedulerDAG *SSD)
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
DenseMap< int, std::deque< SUnit * > >::const_iterator const_sched_iterator
bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO) const
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
SmallPtrSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Determine transitive dependences of unpipelineable instructions.
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const
Return true if the scheduled Phi has a loop carried operand.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
LLVM_ABI void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
unsigned short Latency
Node latency.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
bool hasPhysRegDefs
Has physreg defs that are being used.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
LLVM_ABI bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock * BB
The block in which to insert instructions.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
void dump() const override
LLVM_ABI void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
LLVM_ABI bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SUnit EntrySU
Special node for the region entry.
MachineFunction & MF
Machine function.
SUnit ExitSU
Special node for the region exit.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void insert_range(Range &&R)
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
typename vector_type::const_iterator iterator
bool contains(const_arg_type key) const
Check if the SetVector contains the given key.
void clear()
Completely clear the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
const SwingSchedulerDDG * getDDG() const
void finishBlock() override
Clean up after the software pipeliner runs.
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const
Return true for an order or output dependence that is loop carried potentially.
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
bool mayOverlapInLaterIter(const MachineInstr *BaseMI, const MachineInstr *OtherMI) const
Return false if there is no overlap between the region accessed by BaseMI in an iteration and the reg...
Register getInstrBaseReg(SUnit *SU) const
Return the new base register that was stored away for the changed instruction.
Represents a dependence between two instruction.
bool ignoreDependence(bool IgnoreAnti) const
Returns true for DDG nodes that we ignore when computing the cost functions.
This class provides APIs to retrieve edges from/to an SUnit node, with a particular focus on loop-car...
SwingSchedulerDDG(std::vector< SUnit > &SUnits, SUnit *EntrySU, SUnit *ExitSU, const LoopCarriedEdges &LCE)
const EdgesType & getInEdges(const SUnit *SU) const
bool isValidSchedule(const SMSchedule &Schedule) const
Check if Schedule doesn't violate the validation-only dependencies.
const EdgesType & getOutEdges(const SUnit *SU) const
Object returned by analyzeLoopForPipelining.
virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const =0
Return true if the given instruction should not be pipelined and should be ignored.
TargetInstrInfo - Interface to description of machine instruction set.
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool enableMachinePipeliner() const
True if the subtarget should run MachinePipeliner.
virtual bool useDFAforSMS() const
Default to DFA for resource management, return false when target will use ProcResource in InstrSchedM...
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Wrapper class representing a virtual register or register unit.
constexpr bool isVirtualReg() const
constexpr MCRegUnit asMCRegUnit() const
constexpr Register asVirtualReg() const
The main class in the implementation of the target independent window scheduler.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
@ Valid
The data is already valid.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< DefNode * > Def
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
std::set< NodeId > NodeSet
friend class Instruction
Iterator for Instructions in a `BasicBlock.
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void stable_sort(R &&Range)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
@ WS_Force
Use window algorithm after SMS algorithm fails.
@ WS_On
Turn off window algorithm.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
RegState getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
cl::opt< bool > SwpEnableCopyToPhi
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
LLVM_ABI char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.
@ Increment
Incrementally increasing token ID.
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
LLVM_ABI bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This class holds an SUnit corresponding to a memory operation and other information related to the in...
const Value * MemOpValue
The value of a memory operand.
SmallVector< const Value *, 2 > UnderlyingObjs
bool isTriviallyDisjoint(const SUnitWithMemInfo &Other) const
int64_t MemOpOffset
The offset of a memory operand.
bool IsAllIdentified
True if all the underlying objects are identified.
SUnitWithMemInfo(SUnit *SU)
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
uint64_t FuncUnits
Bitmask representing a set of functional units.
static constexpr LaneBitmask getNone()
Represents loop-carried dependencies.
SmallSetVector< SUnit *, 8 > OrderDep
const OrderDep * getOrderDepOrNull(SUnit *Key) const
void modifySUnits(std::vector< SUnit > &SUnits, const TargetInstrInfo *TII)
Adds some edges to the original DAG that correspond to loop-carried dependencies.
void dump(SUnit *SU, const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI) const
Define a kind of processor resource that will be modeled by the scheduler.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.