72#include "llvm/Config/llvm-config.h"
100#define DEBUG_TYPE "pipeliner"
102STATISTIC(NumTrytoPipeline,
"Number of loops that we attempt to pipeline");
103STATISTIC(NumPipelined,
"Number of loops software pipelined");
104STATISTIC(NumNodeOrderIssues,
"Number of node order issues found");
105STATISTIC(NumFailBranch,
"Pipeliner abort due to unknown branch");
106STATISTIC(NumFailLoop,
"Pipeliner abort due to unsupported loop");
107STATISTIC(NumFailPreheader,
"Pipeliner abort due to missing preheader");
108STATISTIC(NumFailLargeMaxMII,
"Pipeliner abort due to MaxMII too large");
109STATISTIC(NumFailZeroMII,
"Pipeliner abort due to zero MII");
110STATISTIC(NumFailNoSchedule,
"Pipeliner abort due to no schedule found");
111STATISTIC(NumFailZeroStage,
"Pipeliner abort due to zero stage");
112STATISTIC(NumFailLargeMaxStage,
"Pipeliner abort due to too many stages");
113STATISTIC(NumFailTooManyStores,
"Pipeliner abort due to too many stores");
117 cl::desc(
"Enable Software Pipelining"));
126 cl::desc(
"Size limit for the MII."),
132 cl::desc(
"Force pipeliner to use specified II."),
138 cl::desc(
"Maximum stages allowed in the generated scheduled."),
145 cl::desc(
"Prune dependences between unrelated Phi nodes."),
152 cl::desc(
"Prune loop carried order dependences."),
170 cl::desc(
"Instead of emitting the pipelined code, annotate instructions "
171 "with the generated schedule for feeding into the "
172 "-modulo-schedule-test pass"));
177 "Use the experimental peeling code generator for software pipelining"));
185 cl::desc(
"Limit register pressure of scheduled loop"));
190 cl::desc(
"Margin representing the unused percentage of "
191 "the register pressure limit"));
195 cl::desc(
"Use the MVE code generator for software pipelining"));
200 "pipeliner-max-num-stores",
208 cl::desc(
"Enable CopyToPhi DAG Mutation"));
213 "pipeliner-force-issue-width",
220 cl::desc(
"Set how to use window scheduling algorithm."),
222 "Turn off window algorithm."),
224 "Use window algorithm after SMS algorithm fails."),
226 "Use window algorithm instead of SMS algorithm.")));
228unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
236 "Modulo Software Pipelining",
false,
false)
278 enum class InstrTag {
287 TaggedSUnit(
SUnit *SU, InstrTag Tag)
290 InstrTag
getTag()
const {
return InstrTag(getInt()); }
294 struct LoadStoreChunk {
298 void append(
SUnit *SU);
303 std::vector<SUnit> &SUnits;
309 std::vector<BitVector> LoopCarried;
322 std::vector<TaggedSUnit> TaggedSUnits;
336 return LoopCarried[Idx];
341 std::optional<InstrTag> getInstrTag(
SUnit *SU)
const;
343 void addLoopCarriedDepenenciesForChunks(
const LoadStoreChunk &From,
344 const LoadStoreChunk &To);
351 void computeDependenciesAux();
382 TII =
MF->getSubtarget().getInstrInfo();
385 for (
const auto &L : *
MLI)
395bool MachinePipeliner::scheduleLoop(
MachineLoop &L) {
397 for (
const auto &InnerLoop : L)
398 Changed |= scheduleLoop(*InnerLoop);
410 setPragmaPipelineOptions(L);
411 if (!canPipelineLoop(L)) {
415 L.getStartLoc(), L.getHeader())
416 <<
"Failed to pipeline loop";
419 LI.LoopPipelinerInfo.reset();
424 if (useSwingModuloScheduler())
425 Changed = swingModuloScheduler(L);
427 if (useWindowScheduler(
Changed))
428 Changed = runWindowScheduler(L);
430 LI.LoopPipelinerInfo.reset();
434void MachinePipeliner::setPragmaPipelineOptions(
MachineLoop &L) {
439 MachineBasicBlock *LBLK =
L.getTopBlock();
452 MDNode *LoopID = TI->
getMetadata(LLVMContext::MD_loop);
453 if (LoopID ==
nullptr)
470 if (S->
getString() ==
"llvm.loop.pipeline.initiationinterval") {
472 "Pipeline initiation interval hint metadata should have two operands.");
476 }
else if (S->
getString() ==
"llvm.loop.pipeline.disable") {
489 auto It = PhiDeps.find(
Reg);
490 if (It == PhiDeps.end())
501 for (
unsigned Dep : It->second) {
516 unsigned DefReg =
MI.getOperand(0).getReg();
520 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2)
521 Ins->second.push_back(
MI.getOperand(
I).getReg());
528 for (
const auto &KV : PhiDeps) {
529 unsigned Reg = KV.first;
540bool MachinePipeliner::canPipelineLoop(
MachineLoop &L) {
541 if (
L.getNumBlocks() != 1) {
543 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
544 L.getStartLoc(),
L.getHeader())
545 <<
"Not a single basic block: "
546 <<
ore::NV(
"NumBlocks",
L.getNumBlocks());
558 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
559 L.getStartLoc(),
L.getHeader())
560 <<
"Disabled by Pragma.";
570 if (
TII->analyzeBranch(*
L.getHeader(),
LI.TBB,
LI.FBB,
LI.BrCond)) {
571 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeBranch, can NOT pipeline Loop\n");
574 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
575 L.getStartLoc(),
L.getHeader())
576 <<
"The branch can't be understood";
581 LI.LoopInductionVar =
nullptr;
582 LI.LoopCompare =
nullptr;
583 LI.LoopPipelinerInfo =
TII->analyzeLoopForPipelining(
L.getTopBlock());
584 if (!
LI.LoopPipelinerInfo) {
585 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeLoop, can NOT pipeline Loop\n");
588 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
589 L.getStartLoc(),
L.getHeader())
590 <<
"The loop structure is not supported";
595 if (!
L.getLoopPreheader()) {
596 LLVM_DEBUG(
dbgs() <<
"Preheader not found, can NOT pipeline Loop\n");
599 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
600 L.getStartLoc(),
L.getHeader())
601 <<
"No loop preheader found";
606 unsigned NumStores = 0;
607 for (MachineInstr &
MI : *
L.getHeader())
612 NumFailTooManyStores++;
614 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
615 L.getStartLoc(),
L.getHeader())
616 <<
"Too many store instructions in the loop: "
617 <<
ore::NV(
"NumStores", NumStores) <<
" > "
624 preprocessPhiNodes(*
L.getHeader());
629 MachineRegisterInfo &
MRI =
MF->getRegInfo();
633 for (MachineInstr &PI :
B.phis()) {
634 MachineOperand &DefOp = PI.getOperand(0);
636 auto *RC =
MRI.getRegClass(DefOp.
getReg());
638 for (
unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
639 MachineOperand &RegOp = PI.getOperand(i);
646 MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
663bool MachinePipeliner::swingModuloScheduler(
MachineLoop &L) {
664 assert(
L.getBlocks().size() == 1 &&
"SMS works on single blocks only.");
667 SwingSchedulerDAG SMS(
671 MachineBasicBlock *
MBB =
L.getHeader();
689 return SMS.hasNewSchedule();
703bool MachinePipeliner::runWindowScheduler(
MachineLoop &L) {
711 Context.RegClassInfo->runOnMachineFunction(*
MF);
716bool MachinePipeliner::useSwingModuloScheduler() {
721bool MachinePipeliner::useWindowScheduler(
bool Changed) {
728 "llvm.loop.pipeline.initiationinterval is set.\n");
736void SwingSchedulerDAG::setMII(
unsigned ResMII,
unsigned RecMII) {
739 else if (II_setByPragma > 0)
740 MII = II_setByPragma;
742 MII = std::max(ResMII, RecMII);
745void SwingSchedulerDAG::setMAX_II() {
748 else if (II_setByPragma > 0)
749 MAX_II = II_setByPragma;
759 updatePhiDependences();
760 Topo.InitDAGTopologicalSorting();
766 dbgs() <<
"===== Loop Carried Edges Begin =====\n";
769 dbgs() <<
"===== Loop Carried Edges End =====\n";
772 NodeSetType NodeSets;
773 findCircuits(NodeSets);
774 NodeSetType Circuits = NodeSets;
777 unsigned ResMII = calculateResMII();
778 unsigned RecMII = calculateRecMII(NodeSets);
786 setMII(ResMII, RecMII);
790 <<
" (rec=" << RecMII <<
", res=" << ResMII <<
")\n");
796 Pass.ORE->emit([&]() {
798 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
799 <<
"Invalid Minimal Initiation Interval: 0";
807 <<
", we don't pipeline large loops\n");
808 NumFailLargeMaxMII++;
809 Pass.ORE->emit([&]() {
811 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
812 <<
"Minimal Initiation Interval too large: "
813 <<
ore::NV(
"MII", (
int)MII) <<
" > "
815 <<
"Refer to -pipeliner-max-mii.";
820 computeNodeFunctions(NodeSets);
822 registerPressureFilter(NodeSets);
824 colocateNodeSets(NodeSets);
826 checkNodeSets(NodeSets);
829 for (
auto &
I : NodeSets) {
830 dbgs() <<
" Rec NodeSet ";
837 groupRemainingNodes(NodeSets);
839 removeDuplicateNodes(NodeSets);
842 for (
auto &
I : NodeSets) {
843 dbgs() <<
" NodeSet ";
848 computeNodeOrder(NodeSets);
851 checkValidNodeOrder(Circuits);
854 Scheduled = schedulePipeline(Schedule);
859 Pass.ORE->emit([&]() {
861 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
862 <<
"Unable to find schedule";
869 if (numStages == 0) {
872 Pass.ORE->emit([&]() {
874 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
875 <<
"No need to pipeline - no overlapped iterations in schedule.";
882 <<
" : too many stages, abort\n");
883 NumFailLargeMaxStage++;
884 Pass.ORE->emit([&]() {
886 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
887 <<
"Too many stages in schedule: "
888 <<
ore::NV(
"numStages", (
int)numStages) <<
" > "
890 <<
". Refer to -pipeliner-max-stages.";
895 Pass.ORE->emit([&]() {
898 <<
"Pipelined succesfully!";
903 std::vector<MachineInstr *> OrderedInsts;
907 OrderedInsts.push_back(SU->getInstr());
908 Cycles[SU->getInstr()] =
Cycle;
913 for (
auto &KV : NewMIs) {
914 Cycles[KV.first] = Cycles[KV.second];
915 Stages[KV.first] = Stages[KV.second];
916 NewInstrChanges[KV.first] = InstrChanges[
getSUnit(KV.first)];
923 "Cannot serialize a schedule with InstrChanges!");
933 LoopPipelinerInfo->isMVEExpanderSupported() &&
947 for (
auto &KV : NewMIs)
948 MF.deleteMachineInstr(KV.second);
959 assert(Phi.isPHI() &&
"Expecting a Phi.");
963 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
964 if (Phi.getOperand(i + 1).getMBB() !=
Loop)
965 InitVal = Phi.getOperand(i).getReg();
967 LoopVal = Phi.getOperand(i).getReg();
969 assert(InitVal && LoopVal &&
"Unexpected Phi structure.");
975 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
976 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
977 return Phi.getOperand(i).getReg();
986 while (!Worklist.
empty()) {
988 for (
const auto &
SI : SU->
Succs) {
991 if (Visited.
count(SuccSU))
1004 if (!getUnderlyingObjects())
1029bool SUnitWithMemInfo::getUnderlyingObjects() {
1031 if (!
MI->hasOneMemOperand())
1049 const SUnitWithMemInfo &Dst,
1054 if (Src.isTriviallyDisjoint(Dst))
1068 if (Src.isUnknown() || Dst.isUnknown())
1070 if (Src.MemOpValue == Dst.MemOpValue && Src.MemOpOffset <= Dst.MemOpOffset)
1081 for (
const Value *SrcObj : Src.UnderlyingObjs)
1082 for (
const Value *DstObj : Dst.UnderlyingObjs)
1090void LoopCarriedOrderDepsTracker::LoadStoreChunk::append(SUnit *SU) {
1092 if (!
MI->mayLoadOrStore())
1094 (
MI->mayStore() ? Stores : Loads).emplace_back(SU);
1100 : DAG(SSD), BAA(BAA), SUnits(DAG->SUnits), N(SUnits.
size()),
1101 LoopCarried(N,
BitVector(N)), TII(TII), TRI(TRI) {}
1105 for (
auto &SU : SUnits) {
1106 auto Tagged = getInstrTag(&SU);
1111 TaggedSUnits.emplace_back(&SU, *Tagged);
1114 computeDependenciesAux();
1117std::optional<LoopCarriedOrderDepsTracker::InstrTag>
1118LoopCarriedOrderDepsTracker::getInstrTag(
SUnit *SU)
const {
1120 if (
TII->isGlobalMemoryObject(
MI))
1121 return InstrTag::Barrier;
1123 if (
MI->mayStore() ||
1124 (
MI->mayLoad() && !
MI->isDereferenceableInvariantLoad()))
1125 return InstrTag::LoadOrStore;
1127 if (
MI->mayRaiseFPException())
1128 return InstrTag::FPExceptions;
1130 return std::nullopt;
1133void LoopCarriedOrderDepsTracker::addDependenciesBetweenSUs(
1134 const SUnitWithMemInfo &Src,
const SUnitWithMemInfo &Dst) {
1136 if (Src.SU == Dst.SU)
1140 LoopCarried[Src.SU->NodeNum].set(Dst.SU->NodeNum);
1143void LoopCarriedOrderDepsTracker::addLoopCarriedDepenenciesForChunks(
1144 const LoadStoreChunk &From,
const LoadStoreChunk &To) {
1146 for (
const SUnitWithMemInfo &Src : From.Loads)
1147 for (
const SUnitWithMemInfo &Dst : To.Stores)
1148 addDependenciesBetweenSUs(Src, Dst);
1151 for (
const SUnitWithMemInfo &Src : From.Stores)
1152 for (
const SUnitWithMemInfo &Dst : To.Loads)
1153 addDependenciesBetweenSUs(Src, Dst);
1156 for (
const SUnitWithMemInfo &Src : From.Stores)
1157 for (
const SUnitWithMemInfo &Dst : To.Stores)
1158 addDependenciesBetweenSUs(Src, Dst);
1161void LoopCarriedOrderDepsTracker::computeDependenciesAux() {
1163 for (
const auto &TSU : TaggedSUnits) {
1164 InstrTag
Tag = TSU.getTag();
1165 SUnit *SU = TSU.getPointer();
1167 case InstrTag::Barrier:
1168 Chunks.emplace_back();
1170 case InstrTag::LoadOrStore:
1171 Chunks.back().append(SU);
1173 case InstrTag::FPExceptions:
1182 for (
const LoadStoreChunk &Chunk : Chunks)
1183 addLoopCarriedDepenenciesForChunks(Chunk, Chunk);
1195LoopCarriedEdges SwingSchedulerDAG::addLoopCarriedDependences() {
1196 LoopCarriedEdges LCE;
1200 LCODTracker.computeDependencies();
1201 for (
unsigned I = 0;
I != SUnits.size();
I++)
1202 for (
const int Succ : LCODTracker.getLoopCarried(
I).set_bits())
1215void SwingSchedulerDAG::updatePhiDependences() {
1217 const TargetSubtargetInfo &
ST = MF.getSubtarget<TargetSubtargetInfo>();
1220 for (SUnit &
I : SUnits) {
1225 MachineInstr *
MI =
I.getInstr();
1227 for (
const MachineOperand &MO :
MI->operands()) {
1234 UI =
MRI.use_instr_begin(
Reg),
1235 UE =
MRI.use_instr_end();
1237 MachineInstr *
UseMI = &*UI;
1238 SUnit *SU = getSUnit(
UseMI);
1264 }
else if (MO.isUse()) {
1267 if (
DefMI ==
nullptr)
1269 SUnit *SU = getSUnit(
DefMI);
1274 ST.adjustSchedDependency(SU, 0, &
I, MO.getOperandNo(), Dep,
1281 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
1290 for (
auto &PI :
I.Preds) {
1291 MachineInstr *PMI = PI.getSUnit()->getInstr();
1293 if (
I.getInstr()->isPHI()) {
1302 for (
const SDep &
D : RemoveDeps)
1309void SwingSchedulerDAG::changeDependences() {
1313 for (SUnit &
I : SUnits) {
1314 unsigned BasePos = 0, OffsetPos = 0;
1316 int64_t NewOffset = 0;
1317 if (!canUseLastOffsetValue(
I.getInstr(), BasePos, OffsetPos, NewBase,
1322 Register OrigBase =
I.getInstr()->getOperand(BasePos).getReg();
1323 MachineInstr *
DefMI =
MRI.getUniqueVRegDef(OrigBase);
1326 SUnit *DefSU = getSUnit(
DefMI);
1330 MachineInstr *LastMI =
MRI.getUniqueVRegDef(NewBase);
1333 SUnit *LastSU = getSUnit(LastMI);
1337 if (Topo.IsReachable(&
I, LastSU))
1342 for (
const SDep &
P :
I.Preds)
1343 if (
P.getSUnit() == DefSU)
1345 for (
const SDep &
D : Deps) {
1346 Topo.RemovePred(&
I,
D.getSUnit());
1351 for (
auto &
P : LastSU->
Preds)
1354 for (
const SDep &
D : Deps) {
1355 Topo.RemovePred(LastSU,
D.getSUnit());
1362 Topo.AddPred(LastSU, &
I);
1367 InstrChanges[&
I] = std::make_pair(NewBase, NewOffset);
1378 std::vector<MachineInstr *> &OrderedInsts,
1386 Stage <= LastStage; ++Stage) {
1389 Instrs[
Cycle].push_front(SU);
1396 std::deque<SUnit *> &CycleInstrs = Instrs[
Cycle];
1398 for (
SUnit *SU : CycleInstrs) {
1400 OrderedInsts.push_back(
MI);
1410struct FuncUnitSorter {
1411 const InstrItineraryData *InstrItins;
1412 const MCSubtargetInfo *STI;
1413 DenseMap<InstrStage::FuncUnits, unsigned> Resources;
1415 FuncUnitSorter(
const TargetSubtargetInfo &TSI)
1416 : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
1421 unsigned minFuncUnits(
const MachineInstr *Inst,
1424 unsigned min = UINT_MAX;
1425 if (InstrItins && !InstrItins->
isEmpty()) {
1426 for (
const InstrStage &IS :
1428 InstrItins->
endStage(SchedClass))) {
1431 if (numAlternatives < min) {
1432 min = numAlternatives;
1439 const MCSchedClassDesc *SCDesc =
1446 for (
const MCWriteProcResEntry &PRE :
1449 if (!PRE.ReleaseAtCycle)
1451 const MCProcResourceDesc *ProcResource =
1453 unsigned NumUnits = ProcResource->
NumUnits;
1454 if (NumUnits < min) {
1456 F = PRE.ProcResourceIdx;
1461 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1469 void calcCriticalResources(MachineInstr &
MI) {
1470 unsigned SchedClass =
MI.getDesc().getSchedClass();
1471 if (InstrItins && !InstrItins->
isEmpty()) {
1472 for (
const InstrStage &IS :
1474 InstrItins->
endStage(SchedClass))) {
1477 Resources[FuncUnits]++;
1482 const MCSchedClassDesc *SCDesc =
1489 for (
const MCWriteProcResEntry &PRE :
1492 if (!PRE.ReleaseAtCycle)
1494 Resources[PRE.ProcResourceIdx]++;
1498 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1502 bool operator()(
const MachineInstr *IS1,
const MachineInstr *IS2)
const {
1504 unsigned MFUs1 = minFuncUnits(IS1, F1);
1505 unsigned MFUs2 = minFuncUnits(IS2, F2);
1508 return MFUs1 > MFUs2;
1513class HighRegisterPressureDetector {
1514 MachineBasicBlock *OrigMBB;
1515 const MachineRegisterInfo &
MRI;
1516 const TargetRegisterInfo *
TRI;
1518 const unsigned PSetNum;
1524 std::vector<unsigned> InitSetPressure;
1528 std::vector<unsigned> PressureSetLimit;
1530 DenseMap<MachineInstr *, RegisterOperands> ROMap;
1532 using Instr2LastUsesTy = DenseMap<MachineInstr *, SmallDenseSet<Register, 4>>;
1535 using OrderedInstsTy = std::vector<MachineInstr *>;
1536 using Instr2StageTy = DenseMap<MachineInstr *, unsigned>;
1539 static void dumpRegisterPressures(
const std::vector<unsigned> &Pressures) {
1540 if (Pressures.size() == 0) {
1544 for (
unsigned P : Pressures) {
1555 VirtRegOrUnit VRegOrUnit =
1557 : VirtRegOrUnit(static_cast<MCRegUnit>(
Reg.id()));
1558 for (
auto PSetIter =
MRI.getPressureSets(VRegOrUnit); PSetIter.isValid();
1560 dbgs() << *PSetIter <<
' ';
1565 void increaseRegisterPressure(std::vector<unsigned> &Pressure,
1568 VirtRegOrUnit VRegOrUnit =
1570 : VirtRegOrUnit(static_cast<MCRegUnit>(
Reg.id()));
1571 auto PSetIter =
MRI.getPressureSets(VRegOrUnit);
1572 unsigned Weight = PSetIter.getWeight();
1573 for (; PSetIter.isValid(); ++PSetIter)
1574 Pressure[*PSetIter] += Weight;
1577 void decreaseRegisterPressure(std::vector<unsigned> &Pressure,
1579 auto PSetIter =
MRI.getPressureSets(VirtRegOrUnit(
Reg));
1580 unsigned Weight = PSetIter.getWeight();
1581 for (; PSetIter.isValid(); ++PSetIter) {
1582 auto &
P = Pressure[*PSetIter];
1584 "register pressure must be greater than or equal weight");
1606 void computeLiveIn() {
1607 DenseSet<Register>
Used;
1608 for (
auto &
MI : *OrigMBB) {
1609 if (
MI.isDebugInstr())
1611 for (
auto &Use : ROMap[&
MI].
Uses) {
1614 Use.VRegOrUnit.isVirtualReg()
1615 ?
Use.VRegOrUnit.asVirtualReg()
1616 :
Register(
static_cast<unsigned>(
Use.VRegOrUnit.asMCRegUnit()));
1621 if (isReservedRegister(
Reg))
1623 if (isDefinedInThisLoop(
Reg))
1629 for (
auto LiveIn : Used)
1630 increaseRegisterPressure(InitSetPressure, LiveIn);
1634 void computePressureSetLimit(
const RegisterClassInfo &RCI) {
1635 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1650 Instr2LastUsesTy computeLastUses(
const OrderedInstsTy &OrderedInsts,
1651 Instr2StageTy &Stages)
const {
1656 DenseSet<Register> TargetRegs;
1657 const auto UpdateTargetRegs = [
this, &TargetRegs](
Register Reg) {
1658 if (isDefinedInThisLoop(
Reg))
1661 for (MachineInstr *
MI : OrderedInsts) {
1664 UpdateTargetRegs(
Reg);
1666 for (
auto &Use : ROMap.
find(
MI)->getSecond().Uses) {
1669 ?
Use.VRegOrUnit.asVirtualReg()
1671 Use.VRegOrUnit.asMCRegUnit()));
1672 UpdateTargetRegs(
Reg);
1677 const auto InstrScore = [&Stages](MachineInstr *
MI) {
1678 return Stages[
MI] +
MI->isPHI();
1681 DenseMap<Register, MachineInstr *> LastUseMI;
1683 for (
auto &Use : ROMap.
find(
MI)->getSecond().Uses) {
1686 Use.VRegOrUnit.isVirtualReg()
1687 ?
Use.VRegOrUnit.asVirtualReg()
1688 :
Register(
static_cast<unsigned>(
Use.VRegOrUnit.asMCRegUnit()));
1693 MachineInstr *Orig = Ite->second;
1694 MachineInstr *
New =
MI;
1695 if (InstrScore(Orig) < InstrScore(New))
1701 Instr2LastUsesTy LastUses;
1702 for (
auto [
Reg,
MI] : LastUseMI)
1703 LastUses[
MI].insert(
Reg);
1719 std::vector<unsigned>
1720 computeMaxSetPressure(
const OrderedInstsTy &OrderedInsts,
1721 Instr2StageTy &Stages,
1722 const unsigned StageCount)
const {
1723 using RegSetTy = SmallDenseSet<Register, 16>;
1729 auto CurSetPressure = InitSetPressure;
1730 auto MaxSetPressure = InitSetPressure;
1731 auto LastUses = computeLastUses(OrderedInsts, Stages);
1734 dbgs() <<
"Ordered instructions:\n";
1735 for (MachineInstr *
MI : OrderedInsts) {
1736 dbgs() <<
"Stage " << Stages[
MI] <<
": ";
1741 const auto InsertReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1742 VirtRegOrUnit VRegOrUnit) {
1756 increaseRegisterPressure(CurSetPressure,
Reg);
1760 const auto EraseReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1766 if (!RegSet.contains(
Reg))
1771 decreaseRegisterPressure(CurSetPressure,
Reg);
1775 for (
unsigned I = 0;
I < StageCount;
I++) {
1776 for (MachineInstr *
MI : OrderedInsts) {
1777 const auto Stage = Stages[
MI];
1781 const unsigned Iter =
I - Stage;
1783 for (
auto &Def : ROMap.
find(
MI)->getSecond().Defs)
1784 InsertReg(LiveRegSets[Iter],
Def.VRegOrUnit);
1786 for (
auto LastUse : LastUses[
MI]) {
1789 EraseReg(LiveRegSets[Iter - 1], LastUse);
1791 EraseReg(LiveRegSets[Iter], LastUse);
1795 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1796 MaxSetPressure[PSet] =
1797 std::max(MaxSetPressure[PSet], CurSetPressure[PSet]);
1800 dbgs() <<
"CurSetPressure=";
1801 dumpRegisterPressures(CurSetPressure);
1802 dbgs() <<
" iter=" << Iter <<
" stage=" << Stage <<
":";
1808 return MaxSetPressure;
1812 HighRegisterPressureDetector(MachineBasicBlock *OrigMBB,
1813 const MachineFunction &MF)
1814 : OrigMBB(OrigMBB),
MRI(MF.getRegInfo()),
1815 TRI(MF.getSubtarget().getRegisterInfo()),
1816 PSetNum(
TRI->getNumRegPressureSets()), InitSetPressure(PSetNum, 0),
1817 PressureSetLimit(PSetNum, 0) {}
1821 void init(
const RegisterClassInfo &RCI) {
1822 for (MachineInstr &
MI : *OrigMBB) {
1823 if (
MI.isDebugInstr())
1825 ROMap[&
MI].collect(
MI, *
TRI,
MRI,
false,
true);
1829 computePressureSetLimit(RCI);
1834 bool detect(
const SwingSchedulerDAG *SSD, SMSchedule &Schedule,
1835 const unsigned MaxStage)
const {
1837 "the percentage of the margin must be between 0 to 100");
1839 OrderedInstsTy OrderedInsts;
1840 Instr2StageTy Stages;
1842 const auto MaxSetPressure =
1843 computeMaxSetPressure(OrderedInsts, Stages, MaxStage + 1);
1846 dbgs() <<
"Dump MaxSetPressure:\n";
1847 for (
unsigned I = 0;
I < MaxSetPressure.size();
I++) {
1848 dbgs() <<
format(
"MaxSetPressure[%d]=%d\n",
I, MaxSetPressure[
I]);
1853 for (
unsigned PSet = 0; PSet < PSetNum; PSet++) {
1854 unsigned Limit = PressureSetLimit[PSet];
1857 <<
" Margin=" << Margin <<
"\n");
1858 if (Limit < MaxSetPressure[PSet] + Margin) {
1861 <<
"Rejected the schedule because of too high register pressure\n");
1877unsigned SwingSchedulerDAG::calculateResMII() {
1879 ResourceManager
RM(&MF.getSubtarget(),
this);
1880 return RM.calculateResMII();
1889unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1890 unsigned RecMII = 0;
1892 for (NodeSet &Nodes : NodeSets) {
1896 unsigned Delay = Nodes.getLatency();
1897 unsigned Distance = 1;
1900 unsigned CurMII = (Delay + Distance - 1) / Distance;
1901 Nodes.setRecMII(CurMII);
1902 if (CurMII > RecMII)
1910void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1911 SwingSchedulerDAG *DAG) {
1912 BitVector
Added(SUnits.size());
1913 DenseMap<int, int> OutputDeps;
1914 for (
int i = 0, e = SUnits.size(); i != e; ++i) {
1917 for (
auto &OE : DAG->DDG->getOutEdges(&SUnits[i])) {
1920 if (OE.isOutputDep()) {
1921 int N = OE.getDst()->NodeNum;
1923 auto Dep = OutputDeps.
find(BackEdge);
1924 if (Dep != OutputDeps.
end()) {
1925 BackEdge = Dep->second;
1926 OutputDeps.
erase(Dep);
1928 OutputDeps[
N] = BackEdge;
1931 if (OE.getDst()->isBoundaryNode() || OE.isArtificial())
1943 int N = OE.getDst()->NodeNum;
1945 AdjK[i].push_back(
N);
1951 for (
auto &IE : DAG->DDG->getInEdges(&SUnits[i])) {
1952 SUnit *Src =
IE.getSrc();
1953 SUnit *Dst =
IE.getDst();
1956 if (
IE.isOrderDep() && Src->getInstr()->mayLoad()) {
1957 int N = Src->NodeNum;
1959 AdjK[i].push_back(
N);
1966 for (
auto &OD : OutputDeps)
1967 if (!
Added.test(OD.second)) {
1968 AdjK[OD.first].push_back(OD.second);
1969 Added.set(OD.second);
1975bool SwingSchedulerDAG::Circuits::circuit(
int V,
int S, NodeSetType &NodeSets,
1976 const SwingSchedulerDAG *DAG,
1978 SUnit *SV = &SUnits[
V];
1983 for (
auto W : AdjK[V]) {
1984 if (NumPaths > MaxPaths)
1995 if (!Blocked.test(W)) {
1996 if (circuit(W, S, NodeSets, DAG,
1997 Node2Idx->at(W) < Node2Idx->at(V) ?
true : HasBackedge))
2005 for (
auto W : AdjK[V]) {
2016void SwingSchedulerDAG::Circuits::unblock(
int U) {
2018 SmallPtrSet<SUnit *, 4> &BU =
B[
U];
2019 while (!BU.
empty()) {
2020 SmallPtrSet<SUnit *, 4>::iterator
SI = BU.
begin();
2021 assert(SI != BU.
end() &&
"Invalid B set.");
2024 if (Blocked.test(
W->NodeNum))
2025 unblock(
W->NodeNum);
2031void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
2032 Circuits Cir(SUnits, Topo);
2034 Cir.createAdjacencyStructure(
this);
2035 for (
int I = 0,
E = SUnits.size();
I !=
E; ++
I) {
2037 Cir.circuit(
I,
I, NodeSets,
this);
2059void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
2060 for (SUnit &SU : DAG->
SUnits) {
2070 for (
auto &Dep : SU.
Preds) {
2071 SUnit *TmpSU = Dep.getSUnit();
2072 MachineInstr *TmpMI = TmpSU->
getInstr();
2083 if (PHISUs.
size() == 0 || SrcSUs.
size() == 0)
2091 for (
auto &Dep : PHISUs[Index]->Succs) {
2095 SUnit *TmpSU = Dep.getSUnit();
2096 MachineInstr *TmpMI = TmpSU->
getInstr();
2105 if (UseSUs.
size() == 0)
2110 for (
auto *
I : UseSUs) {
2111 for (
auto *Src : SrcSUs) {
2127void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
2128 ScheduleInfo.resize(SUnits.size());
2131 for (
int I : Topo) {
2132 const SUnit &SU = SUnits[
I];
2139 for (
int I : Topo) {
2141 int zeroLatencyDepth = 0;
2142 SUnit *SU = &SUnits[
I];
2143 for (
const auto &IE : DDG->getInEdges(SU)) {
2144 SUnit *Pred =
IE.getSrc();
2145 if (
IE.getLatency() == 0)
2147 std::max(zeroLatencyDepth, getZeroLatencyDepth(Pred) + 1);
2148 if (
IE.ignoreDependence(
true))
2150 asap = std::max(asap, (
int)(getASAP(Pred) +
IE.getLatency() -
2151 IE.getDistance() * MII));
2153 maxASAP = std::max(maxASAP, asap);
2154 ScheduleInfo[
I].ASAP = asap;
2155 ScheduleInfo[
I].ZeroLatencyDepth = zeroLatencyDepth;
2161 int zeroLatencyHeight = 0;
2162 SUnit *SU = &SUnits[
I];
2163 for (
const auto &OE : DDG->getOutEdges(SU)) {
2164 SUnit *Succ = OE.getDst();
2167 if (OE.getLatency() == 0)
2169 std::max(zeroLatencyHeight, getZeroLatencyHeight(Succ) + 1);
2170 if (OE.ignoreDependence(
true))
2172 alap = std::min(alap, (
int)(getALAP(Succ) - OE.getLatency() +
2173 OE.getDistance() * MII));
2176 ScheduleInfo[
I].ALAP = alap;
2177 ScheduleInfo[
I].ZeroLatencyHeight = zeroLatencyHeight;
2181 for (NodeSet &
I : NodeSets)
2182 I.computeNodeSetInfo(
this);
2185 for (
unsigned i = 0; i < SUnits.size(); i++) {
2186 dbgs() <<
"\tNode " << i <<
":\n";
2187 dbgs() <<
"\t ASAP = " << getASAP(&SUnits[i]) <<
"\n";
2188 dbgs() <<
"\t ALAP = " << getALAP(&SUnits[i]) <<
"\n";
2189 dbgs() <<
"\t MOV = " << getMOV(&SUnits[i]) <<
"\n";
2190 dbgs() <<
"\t D = " << getDepth(&SUnits[i]) <<
"\n";
2191 dbgs() <<
"\t H = " << getHeight(&SUnits[i]) <<
"\n";
2192 dbgs() <<
"\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) <<
"\n";
2193 dbgs() <<
"\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) <<
"\n";
2208 SUnit *PredSU = IE.getSrc();
2209 if (S && S->count(PredSU) == 0)
2211 if (IE.ignoreDependence(
true))
2222 SUnit *SuccSU = OE.getDst();
2223 if (!OE.isAntiDep())
2225 if (S && S->count(SuccSU) == 0)
2231 return !Preds.
empty();
2244 SUnit *SuccSU = OE.getDst();
2245 if (S && S->count(SuccSU) == 0)
2247 if (OE.ignoreDependence(
false))
2258 SUnit *PredSU = IE.getSrc();
2259 if (!IE.isAntiDep())
2261 if (S && S->count(PredSU) == 0)
2267 return !Succs.
empty();
2283 if (!Visited.
insert(Cur).second)
2284 return Path.contains(Cur);
2285 bool FoundPath =
false;
2287 if (!OE.ignoreDependence(
false))
2289 computePath(OE.getDst(), Path, DestNodes, Exclude, Visited, DDG);
2291 if (IE.isAntiDep() && IE.getDistance() == 0)
2293 computePath(IE.getSrc(), Path, DestNodes, Exclude, Visited, DDG);
2308 for (
SUnit *SU : NS) {
2314 if (
Reg.isVirtual())
2316 else if (
MRI.isAllocatable(
Reg))
2317 for (MCRegUnit Unit :
TRI->regunits(
Reg.asMCReg()))
2321 for (
SUnit *SU : NS)
2325 if (
Reg.isVirtual()) {
2329 }
else if (
MRI.isAllocatable(
Reg)) {
2330 for (MCRegUnit Unit :
TRI->regunits(
Reg.asMCReg()))
2341void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
2342 for (
auto &NS : NodeSets) {
2346 IntervalPressure RecRegPressure;
2347 RegPressureTracker RecRPTracker(RecRegPressure);
2348 RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(),
false,
true);
2350 RecRPTracker.closeBottom();
2352 std::vector<SUnit *> SUnits(NS.begin(), NS.end());
2353 llvm::sort(SUnits, [](
const SUnit *
A,
const SUnit *
B) {
2354 return A->NodeNum >
B->NodeNum;
2357 for (
auto &SU : SUnits) {
2363 RecRPTracker.setPos(std::next(CurInstI));
2365 RegPressureDelta RPDelta;
2367 RecRPTracker.getMaxUpwardPressureDelta(SU->
getInstr(),
nullptr, RPDelta,
2372 dbgs() <<
"Excess register pressure: SU(" << SU->
NodeNum <<
") "
2375 NS.setExceedPressure(SU);
2378 RecRPTracker.recede();
2385void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
2386 unsigned Colocate = 0;
2387 for (
int i = 0, e = NodeSets.size(); i < e; ++i) {
2389 SmallSetVector<SUnit *, 8>
S1;
2392 for (
int j = i + 1;
j <
e; ++
j) {
2396 SmallSetVector<SUnit *, 8> S2;
2413void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
2418 for (
auto &NS : NodeSets) {
2419 if (NS.getRecMII() > 2)
2421 if (NS.getMaxDepth() > MII)
2430void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
2431 SetVector<SUnit *> NodesAdded;
2432 SmallPtrSet<SUnit *, 8> Visited;
2435 for (NodeSet &
I : NodeSets) {
2436 SmallSetVector<SUnit *, 8>
N;
2439 SetVector<SUnit *>
Path;
2440 for (SUnit *NI :
N) {
2442 computePath(NI, Path, NodesAdded,
I, Visited, DDG.get());
2449 if (
succ_L(NodesAdded,
N, DDG.get())) {
2450 SetVector<SUnit *>
Path;
2451 for (SUnit *NI :
N) {
2453 computePath(NI, Path,
I, NodesAdded, Visited, DDG.get());
2464 SmallSetVector<SUnit *, 8>
N;
2465 if (
succ_L(NodesAdded,
N, DDG.get()))
2467 addConnectedNodes(
I, NewSet, NodesAdded);
2468 if (!NewSet.
empty())
2469 NodeSets.push_back(NewSet);
2474 if (
pred_L(NodesAdded,
N, DDG.get()))
2476 addConnectedNodes(
I, NewSet, NodesAdded);
2477 if (!NewSet.
empty())
2478 NodeSets.push_back(NewSet);
2482 for (SUnit &SU : SUnits) {
2483 if (NodesAdded.
count(&SU) == 0) {
2485 addConnectedNodes(&SU, NewSet, NodesAdded);
2486 if (!NewSet.
empty())
2487 NodeSets.push_back(NewSet);
2493void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
2494 SetVector<SUnit *> &NodesAdded) {
2497 for (
auto &OE : DDG->getOutEdges(SU)) {
2499 if (!OE.isArtificial() && !
Successor->isBoundaryNode() &&
2501 addConnectedNodes(
Successor, NewSet, NodesAdded);
2503 for (
auto &IE : DDG->getInEdges(SU)) {
2504 SUnit *Predecessor =
IE.getSrc();
2505 if (!
IE.isArtificial() && NodesAdded.
count(Predecessor) == 0)
2506 addConnectedNodes(Predecessor, NewSet, NodesAdded);
2515 for (
SUnit *SU : Set1) {
2516 if (Set2.
count(SU) != 0)
2519 return !Result.empty();
2523void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
2524 for (NodeSetType::iterator
I = NodeSets.begin(),
E = NodeSets.end();
I !=
E;
2527 for (NodeSetType::iterator J =
I + 1; J !=
E;) {
2532 for (SUnit *SU : *J)
2544void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
2545 for (NodeSetType::iterator
I = NodeSets.begin(),
E = NodeSets.end();
I !=
E;
2547 for (NodeSetType::iterator J =
I + 1; J !=
E;) {
2548 J->remove_if([&](SUnit *SUJ) {
return I->count(SUJ); });
2563void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
2564 SmallSetVector<SUnit *, 8>
R;
2567 for (
auto &Nodes : NodeSets) {
2570 SmallSetVector<SUnit *, 8>
N;
2585 }
else if (NodeSets.size() == 1) {
2586 for (
const auto &
N : Nodes)
2587 if (
N->Succs.size() == 0)
2593 SUnit *maxASAP =
nullptr;
2594 for (SUnit *SU : Nodes) {
2595 if (maxASAP ==
nullptr || getASAP(SU) > getASAP(maxASAP) ||
2596 (getASAP(SU) == getASAP(maxASAP) && SU->
NodeNum > maxASAP->
NodeNum))
2604 while (!
R.empty()) {
2605 if (Order == TopDown) {
2609 while (!
R.empty()) {
2610 SUnit *maxHeight =
nullptr;
2611 for (SUnit *
I : R) {
2612 if (maxHeight ==
nullptr || getHeight(
I) > getHeight(maxHeight))
2614 else if (getHeight(
I) == getHeight(maxHeight) &&
2615 getZeroLatencyHeight(
I) > getZeroLatencyHeight(maxHeight))
2617 else if (getHeight(
I) == getHeight(maxHeight) &&
2618 getZeroLatencyHeight(
I) ==
2619 getZeroLatencyHeight(maxHeight) &&
2620 getMOV(
I) < getMOV(maxHeight))
2625 R.remove(maxHeight);
2626 for (
const auto &OE : DDG->getOutEdges(maxHeight)) {
2627 SUnit *SU = OE.getDst();
2628 if (Nodes.count(SU) == 0)
2632 if (OE.ignoreDependence(
false))
2641 for (
const auto &IE : DDG->getInEdges(maxHeight)) {
2642 SUnit *SU =
IE.getSrc();
2643 if (!
IE.isAntiDep())
2645 if (Nodes.count(SU) == 0)
2654 SmallSetVector<SUnit *, 8>
N;
2661 while (!
R.empty()) {
2662 SUnit *maxDepth =
nullptr;
2663 for (SUnit *
I : R) {
2664 if (maxDepth ==
nullptr || getDepth(
I) > getDepth(maxDepth))
2666 else if (getDepth(
I) == getDepth(maxDepth) &&
2667 getZeroLatencyDepth(
I) > getZeroLatencyDepth(maxDepth))
2669 else if (getDepth(
I) == getDepth(maxDepth) &&
2670 getZeroLatencyDepth(
I) == getZeroLatencyDepth(maxDepth) &&
2671 getMOV(
I) < getMOV(maxDepth))
2677 if (Nodes.isExceedSU(maxDepth)) {
2680 R.insert(Nodes.getNode(0));
2683 for (
const auto &IE : DDG->getInEdges(maxDepth)) {
2684 SUnit *SU =
IE.getSrc();
2685 if (Nodes.count(SU) == 0)
2696 for (
const auto &OE : DDG->getOutEdges(maxDepth)) {
2697 SUnit *SU = OE.getDst();
2698 if (!OE.isAntiDep())
2700 if (Nodes.count(SU) == 0)
2709 SmallSetVector<SUnit *, 8>
N;
2718 dbgs() <<
"Node order: ";
2720 dbgs() <<
" " <<
I->NodeNum <<
" ";
2727bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2734 bool scheduleFound =
false;
2735 std::unique_ptr<HighRegisterPressureDetector> HRPDetector;
2738 std::make_unique<HighRegisterPressureDetector>(Loop.getHeader(), MF);
2739 HRPDetector->init(RegClassInfo);
2742 for (
unsigned II = MII;
II <= MAX_II && !scheduleFound; ++
II) {
2754 int EarlyStart = INT_MIN;
2755 int LateStart = INT_MAX;
2764 dbgs() <<
format(
"\tes: %8x ls: %8x\n", EarlyStart, LateStart));
2766 if (EarlyStart > LateStart)
2767 scheduleFound =
false;
2768 else if (EarlyStart != INT_MIN && LateStart == INT_MAX)
2770 Schedule.
insert(SU, EarlyStart, EarlyStart + (
int)
II - 1,
II);
2771 else if (EarlyStart == INT_MIN && LateStart != INT_MAX)
2773 Schedule.
insert(SU, LateStart, LateStart - (
int)
II + 1,
II);
2774 else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2775 LateStart = std::min(LateStart, EarlyStart + (
int)
II - 1);
2784 scheduleFound = Schedule.
insert(SU, LateStart, EarlyStart,
II);
2786 scheduleFound = Schedule.
insert(SU, EarlyStart, LateStart,
II);
2789 scheduleFound = Schedule.
insert(SU, FirstCycle + getASAP(SU),
2790 FirstCycle + getASAP(SU) +
II - 1,
II);
2798 scheduleFound =
false;
2802 dbgs() <<
"\tCan't schedule\n";
2804 }
while (++NI != NE && scheduleFound);
2809 scheduleFound = DDG->isValidSchedule(Schedule);
2831 if (scheduleFound) {
2832 scheduleFound = LoopPipelinerInfo->shouldUseSchedule(*
this, Schedule);
2837 if (scheduleFound) {
2839 Pass.ORE->emit([&]() {
2840 return MachineOptimizationRemarkAnalysis(
2841 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
2842 <<
"Schedule found with Initiation Interval: "
2844 <<
", MaxStageCount: "
2858 if (!
Reg.isVirtual())
2860 if (
MRI.getVRegDef(
Reg)->getParent() !=
MI.getParent())
2872 if (!
Op.isReg() || !
Op.getReg().isVirtual())
2900 if (Def->getParent() != LoopBB)
2903 if (Def->isCopy()) {
2905 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
2907 CurReg = Def->getOperand(1).getReg();
2908 }
else if (Def->isPHI()) {
2914 }
else if (
TII->getIncrementValue(*Def,
Value)) {
2922 bool OffsetIsScalable;
2923 if (
TII->getMemOperandWithOffset(*Def, BaseOp,
Offset, OffsetIsScalable,
2926 CurReg = BaseOp->
getReg();
2938 if (CurReg == OrgReg)
2950bool SwingSchedulerDAG::computeDelta(
const MachineInstr &
MI,
int &Delta)
const {
2951 const TargetRegisterInfo *
TRI = MF.getSubtarget().getRegisterInfo();
2952 const MachineOperand *BaseOp;
2954 bool OffsetIsScalable;
2955 if (!
TII->getMemOperandWithOffset(
MI, BaseOp,
Offset, OffsetIsScalable,
TRI))
2959 if (OffsetIsScalable)
2962 if (!BaseOp->
isReg())
2975bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *
MI,
2977 unsigned &OffsetPos,
2983 unsigned BasePosLd, OffsetPosLd;
2989 MachineRegisterInfo &
MRI =
MI->getMF()->getRegInfo();
2990 MachineInstr *
Phi =
MRI.getVRegDef(BaseReg);
2991 if (!Phi || !
Phi->isPHI())
2999 MachineInstr *PrevDef =
MRI.getVRegDef(PrevReg);
3000 if (!PrevDef || PrevDef ==
MI)
3006 unsigned BasePos1 = 0, OffsetPos1 = 0;
3012 int64_t LoadOffset =
MI->getOperand(OffsetPosLd).getImm();
3014 MachineInstr *NewMI = MF.CloneMachineInstr(
MI);
3017 MF.deleteMachineInstr(NewMI);
3022 BasePos = BasePosLd;
3023 OffsetPos = OffsetPosLd;
3035 InstrChanges.find(SU);
3036 if (It != InstrChanges.
end()) {
3037 std::pair<Register, int64_t> RegAndOffset = It->second;
3038 unsigned BasePos, OffsetPos;
3039 if (!
TII->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos))
3041 Register BaseReg =
MI->getOperand(BasePos).getReg();
3047 if (BaseStageNum < DefStageNum) {
3049 int OffsetDiff = DefStageNum - BaseStageNum;
3050 if (DefCycleNum < BaseCycleNum) {
3056 MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
3071 while (Def->isPHI()) {
3072 if (!Visited.
insert(Def).second)
3074 for (
unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
3075 if (Def->getOperand(i + 1).getMBB() == BB) {
3076 Def =
MRI.getVRegDef(Def->getOperand(i).getReg());
3087 int DeltaB, DeltaO, Delta;
3094 int64_t OffsetB, OffsetO;
3095 bool OffsetBIsScalable, OffsetOIsScalable;
3097 if (!
TII->getMemOperandWithOffset(*BaseMI, BaseOpB, OffsetB,
3098 OffsetBIsScalable,
TRI) ||
3099 !
TII->getMemOperandWithOffset(*OtherMI, BaseOpO, OffsetO,
3100 OffsetOIsScalable,
TRI))
3103 if (OffsetBIsScalable || OffsetOIsScalable)
3113 if (!RegB.
isVirtual() || !RegO.isVirtual())
3118 if (!DefB || !DefO || !DefB->
isPHI() || !DefO->
isPHI())
3143 dbgs() <<
"Overlap check:\n";
3144 dbgs() <<
" BaseMI: ";
3146 dbgs() <<
" Base + " << OffsetB <<
" + I * " << Delta
3147 <<
", Len: " << AccessSizeB.
getValue() <<
"\n";
3148 dbgs() <<
" OtherMI: ";
3150 dbgs() <<
" Base + " << OffsetO <<
" + I * " << Delta
3151 <<
", Len: " << AccessSizeO.
getValue() <<
"\n";
3159 int64_t BaseMinAddr = OffsetB;
3160 int64_t OhterNextIterMaxAddr = OffsetO + Delta + AccessSizeO.
getValue() - 1;
3161 if (BaseMinAddr > OhterNextIterMaxAddr) {
3166 int64_t BaseMaxAddr = OffsetB + AccessSizeB.
getValue() - 1;
3167 int64_t OtherNextIterMinAddr = OffsetO + Delta;
3168 if (BaseMaxAddr < OtherNextIterMinAddr) {
3182 if ((!Edge.isOrderDep() && !Edge.isOutputDep()) || Edge.isArtificial() ||
3183 Edge.getDst()->isBoundaryNode())
3189 if (Edge.isOutputDep())
3194 assert(
SI !=
nullptr && DI !=
nullptr &&
"Expecting SUnit with an MI.");
3211void SwingSchedulerDAG::postProcessDAG() {
3212 for (
auto &M : Mutations)
3222 bool forward =
true;
3224 dbgs() <<
"Trying to insert node between " << StartCycle <<
" and "
3225 << EndCycle <<
" II: " <<
II <<
"\n";
3227 if (StartCycle > EndCycle)
3231 int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
3232 for (
int curCycle = StartCycle; curCycle != termCycle;
3233 forward ? ++curCycle : --curCycle) {
3236 ProcItinResources.canReserveResources(*SU, curCycle)) {
3238 dbgs() <<
"\tinsert at cycle " << curCycle <<
" ";
3243 ProcItinResources.reserveResources(*SU, curCycle);
3244 ScheduledInstrs[curCycle].push_back(SU);
3245 InstrToCycle.insert(std::make_pair(SU, curCycle));
3246 if (curCycle > LastCycle)
3247 LastCycle = curCycle;
3248 if (curCycle < FirstCycle)
3249 FirstCycle = curCycle;
3253 dbgs() <<
"\tfailed to insert at cycle " << curCycle <<
" ";
3266 int EarlyCycle = INT_MAX;
3267 while (!Worklist.
empty()) {
3270 if (Visited.
count(PrevSU))
3272 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
3273 if (it == InstrToCycle.end())
3275 EarlyCycle = std::min(EarlyCycle, it->second);
3276 for (
const auto &IE : DDG->
getInEdges(PrevSU))
3277 if (IE.isOrderDep() || IE.isOutputDep())
3290 int LateCycle = INT_MIN;
3291 while (!Worklist.
empty()) {
3296 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
3297 if (it == InstrToCycle.end())
3299 LateCycle = std::max(LateCycle, it->second);
3301 if (OE.isOrderDep() || OE.isOutputDep())
3312 for (
auto &
P : SU->
Preds)
3313 if (
P.getKind() ==
SDep::Anti &&
P.getSUnit()->getInstr()->isPHI())
3314 for (
auto &S :
P.getSUnit()->Succs)
3315 if (S.getKind() ==
SDep::Data && S.getSUnit()->getInstr()->isPHI())
3316 return P.getSUnit();
3329 for (
int cycle =
getFirstCycle(); cycle <= LastCycle; ++cycle) {
3332 if (IE.getSrc() ==
I) {
3337 *MinLateStart = std::min(*MinLateStart, End);
3339 int EarlyStart = cycle + IE.getLatency() - IE.getDistance() *
II;
3340 *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3345 if (OE.getDst() ==
I) {
3350 *MaxEarlyStart = std::max(*MaxEarlyStart, Start);
3352 int LateStart = cycle - OE.getLatency() + OE.getDistance() *
II;
3353 *MinLateStart = std::min(*MinLateStart, LateStart);
3358 for (
const auto &Dep : SU->
Preds) {
3361 if (BE && Dep.getSUnit() == BE && !SU->
getInstr()->
isPHI() &&
3363 *MinLateStart = std::min(*MinLateStart, cycle);
3373 std::deque<SUnit *> &Insts)
const {
3375 bool OrderBeforeUse =
false;
3376 bool OrderAfterDef =
false;
3377 bool OrderBeforeDef =
false;
3378 unsigned MoveDef = 0;
3379 unsigned MoveUse = 0;
3384 for (std::deque<SUnit *>::iterator
I = Insts.begin(), E = Insts.end();
I != E;
3387 if (!MO.isReg() || !MO.getReg().isVirtual())
3391 unsigned BasePos, OffsetPos;
3392 if (ST.getInstrInfo()->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos))
3393 if (
MI->getOperand(BasePos).getReg() == Reg)
3397 std::tie(Reads, Writes) =
3398 (*I)->getInstr()->readsWritesVirtualRegister(Reg);
3400 OrderBeforeUse =
true;
3405 OrderAfterDef =
true;
3407 }
else if (MO.isUse() && Writes &&
stageScheduled(*
I) == StageInst1) {
3409 OrderBeforeUse =
true;
3413 OrderAfterDef =
true;
3417 OrderBeforeUse =
true;
3421 OrderAfterDef =
true;
3426 OrderBeforeUse =
true;
3432 OrderBeforeDef =
true;
3440 if (OE.getDst() != *
I)
3443 OrderBeforeUse =
true;
3450 else if ((OE.isAntiDep() || OE.isOutputDep()) &&
3452 OrderBeforeUse =
true;
3453 if ((MoveUse == 0) || (Pos < MoveUse))
3458 if (IE.getSrc() != *
I)
3460 if ((IE.isAntiDep() || IE.isOutputDep() || IE.isOrderDep()) &&
3462 OrderAfterDef =
true;
3469 if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3470 OrderBeforeUse =
false;
3475 OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3479 if (OrderBeforeUse && OrderAfterDef) {
3480 SUnit *UseSU = Insts.at(MoveUse);
3481 SUnit *DefSU = Insts.at(MoveDef);
3482 if (MoveUse > MoveDef) {
3483 Insts.erase(Insts.begin() + MoveUse);
3484 Insts.erase(Insts.begin() + MoveDef);
3486 Insts.erase(Insts.begin() + MoveDef);
3487 Insts.erase(Insts.begin() + MoveUse);
3497 Insts.push_front(SU);
3499 Insts.push_back(SU);
3507 assert(Phi.isPHI() &&
"Expecting a Phi.");
3514 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3522 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3540 if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3546 if (DMO.getReg() == LoopReg)
3557 if (InstrToCycle.count(IE.getSrc()))
3568 for (
auto &SU : SSD->
SUnits)
3573 while (!Worklist.
empty()) {
3575 if (DoNotPipeline.
count(SU))
3578 DoNotPipeline.
insert(SU);
3585 if (OE.getDistance() == 1)
3588 return DoNotPipeline;
3597 int NewLastCycle = INT_MIN;
3602 NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);
3609 if (IE.getDistance() == 0)
3610 NewCycle = std::max(InstrToCycle[IE.getSrc()], NewCycle);
3615 if (OE.getDistance() == 1)
3616 NewCycle = std::max(InstrToCycle[OE.getDst()], NewCycle);
3618 int OldCycle = InstrToCycle[&SU];
3619 if (OldCycle != NewCycle) {
3620 InstrToCycle[&SU] = NewCycle;
3625 <<
") is not pipelined; moving from cycle " << OldCycle
3626 <<
" to " << NewCycle <<
" Instr:" << *SU.
getInstr());
3651 if (FirstCycle + InitiationInterval <= NewCycle)
3654 NewLastCycle = std::max(NewLastCycle, NewCycle);
3656 LastCycle = NewLastCycle;
3673 int CycleDef = InstrToCycle[&SU];
3674 assert(StageDef != -1 &&
"Instruction should have been scheduled.");
3676 SUnit *Dst = OE.getDst();
3677 if (OE.isAssignedRegDep() && !Dst->isBoundaryNode())
3678 if (OE.getReg().isPhysical()) {
3681 if (InstrToCycle[Dst] <= CycleDef)
3699void SwingSchedulerDAG::checkValidNodeOrder(
const NodeSetType &Circuits)
const {
3702 typedef std::pair<SUnit *, unsigned> UnitIndex;
3703 std::vector<UnitIndex> Indices(
NodeOrder.size(), std::make_pair(
nullptr, 0));
3705 for (
unsigned i = 0, s =
NodeOrder.size(); i < s; ++i)
3706 Indices.push_back(std::make_pair(
NodeOrder[i], i));
3708 auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3709 return std::get<0>(i1) < std::get<0>(i2);
3722 for (
unsigned i = 0, s =
NodeOrder.size(); i < s; ++i) {
3726 bool PredBefore =
false;
3727 bool SuccBefore =
false;
3734 for (
const auto &IE : DDG->getInEdges(SU)) {
3735 SUnit *PredSU = IE.getSrc();
3736 unsigned PredIndex = std::get<1>(
3745 for (
const auto &OE : DDG->getOutEdges(SU)) {
3746 SUnit *SuccSU = OE.getDst();
3752 unsigned SuccIndex = std::get<1>(
3765 Circuits, [SU](
const NodeSet &Circuit) {
return Circuit.
count(SU); });
3770 NumNodeOrderIssues++;
3774 <<
" are scheduled before node " << SU->
NodeNum
3781 dbgs() <<
"Invalid node order found!\n";
3794 for (
SUnit *SU : Instrs) {
3796 for (
unsigned i = 0, e =
MI->getNumOperands(); i < e; ++i) {
3804 InstrChanges.find(SU);
3805 if (It != InstrChanges.
end()) {
3806 unsigned BasePos, OffsetPos;
3808 if (
TII->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos)) {
3812 MI->getOperand(OffsetPos).getImm() - It->second.second;
3825 unsigned TiedUseIdx = 0;
3826 if (
MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3828 OverlapReg =
MI->getOperand(TiedUseIdx).getReg();
3830 NewBaseReg =
MI->getOperand(i).getReg();
3839 const std::deque<SUnit *> &Instrs)
const {
3840 std::deque<SUnit *> NewOrderPhi;
3841 for (
SUnit *SU : Instrs) {
3843 NewOrderPhi.push_back(SU);
3845 std::deque<SUnit *> NewOrderI;
3846 for (
SUnit *SU : Instrs) {
3862 std::deque<SUnit *> &cycleInstrs =
3863 ScheduledInstrs[cycle + (stage * InitiationInterval)];
3865 ScheduledInstrs[cycle].push_front(SU);
3871 for (
int cycle =
getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3872 ScheduledInstrs.erase(cycle);
3882 std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[
Cycle];
3891 os <<
"Num nodes " <<
size() <<
" rec " << RecMII <<
" mov " << MaxMOV
3892 <<
" depth " << MaxDepth <<
" col " << Colocate <<
"\n";
3893 for (
const auto &
I : Nodes)
3894 os <<
" SU(" <<
I->NodeNum <<
") " << *(
I->getInstr());
3898#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3905 for (
SUnit *CI : cycleInstrs->second) {
3907 os <<
"(" << CI->
NodeNum <<
") ";
3918void ResourceManager::dumpMRT()
const {
3922 std::stringstream SS;
3924 SS << std::setw(4) <<
"Slot";
3925 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I)
3926 SS << std::setw(3) <<
I;
3927 SS << std::setw(7) <<
"#Mops"
3929 for (
int Slot = 0; Slot < InitiationInterval; ++Slot) {
3930 SS << std::setw(4) << Slot;
3931 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I)
3932 SS << std::setw(3) << MRT[Slot][
I];
3933 SS << std::setw(7) << NumScheduledMops[Slot] <<
"\n";
3942 unsigned ProcResourceID = 0;
3946 assert(SM.getNumProcResourceKinds() < 64 &&
3947 "Too many kinds of resources, unsupported");
3950 Masks.
resize(SM.getNumProcResourceKinds());
3951 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
3953 if (
Desc.SubUnitsIdxBegin)
3955 Masks[
I] = 1ULL << ProcResourceID;
3959 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
3961 if (!
Desc.SubUnitsIdxBegin)
3963 Masks[
I] = 1ULL << ProcResourceID;
3964 for (
unsigned U = 0; U <
Desc.NumUnits; ++U)
3965 Masks[
I] |= Masks[
Desc.SubUnitsIdxBegin[U]];
3970 dbgs() <<
"ProcResourceDesc:\n";
3971 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
3973 dbgs() <<
format(
" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
3974 ProcResource->
Name,
I, Masks[
I],
3977 dbgs() <<
" -----------------\n";
3985 dbgs() <<
"canReserveResources:\n";
3988 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
3994 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
4000 reserveResources(SCDesc,
Cycle);
4001 bool Result = !isOverbooked();
4002 unreserveResources(SCDesc,
Cycle);
4011 dbgs() <<
"reserveResources:\n";
4014 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
4020 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
4026 reserveResources(SCDesc,
Cycle);
4031 dbgs() <<
"reserveResources: done!\n\n";
4042 ++MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
4045 ++NumScheduledMops[positiveModulo(
C, InitiationInterval)];
4054 --MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
4057 --NumScheduledMops[positiveModulo(
C, InitiationInterval)];
4060bool ResourceManager::isOverbooked()
const {
4062 for (
int Slot = 0;
Slot < InitiationInterval; ++
Slot) {
4063 for (
unsigned I = 1,
E = SM.getNumProcResourceKinds();
I <
E; ++
I) {
4064 const MCProcResourceDesc *
Desc = SM.getProcResource(
I);
4065 if (MRT[Slot][
I] >
Desc->NumUnits)
4068 if (NumScheduledMops[Slot] > IssueWidth)
4074int ResourceManager::calculateResMIIDFA()
const {
4079 FuncUnitSorter FUS = FuncUnitSorter(*ST);
4080 for (SUnit &SU : DAG->
SUnits)
4081 FUS.calcCriticalResources(*SU.
getInstr());
4082 PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter>
4085 for (SUnit &SU : DAG->
SUnits)
4092 while (!FuncUnitOrder.empty()) {
4093 MachineInstr *
MI = FuncUnitOrder.top();
4094 FuncUnitOrder.pop();
4095 if (
TII->isZeroCost(
MI->getOpcode()))
4101 unsigned ReservedCycles = 0;
4102 auto *RI = Resources.
begin();
4103 auto *RE = Resources.
end();
4105 dbgs() <<
"Trying to reserve resource for " << NumCycles
4106 <<
" cycles for \n";
4109 for (
unsigned C = 0;
C < NumCycles; ++
C)
4111 if ((*RI)->canReserveResources(*
MI)) {
4112 (*RI)->reserveResources(*
MI);
4119 <<
", NumCycles:" << NumCycles <<
"\n");
4121 for (
unsigned C = ReservedCycles;
C < NumCycles; ++
C) {
4123 <<
"NewResource created to reserve resources"
4126 assert(NewResource->canReserveResources(*
MI) &&
"Reserve error.");
4127 NewResource->reserveResources(*
MI);
4128 Resources.
push_back(std::unique_ptr<DFAPacketizer>(NewResource));
4132 int Resmii = Resources.
size();
4139 return calculateResMIIDFA();
4146 for (
SUnit &SU : DAG->SUnits) {
4158 <<
" WriteProcRes: ";
4163 make_range(STI->getWriteProcResBegin(SCDesc),
4164 STI->getWriteProcResEnd(SCDesc))) {
4168 SM.getProcResource(PRE.ProcResourceIdx);
4169 dbgs() <<
Desc->Name <<
": " << PRE.ReleaseAtCycle <<
", ";
4172 ResourceCount[PRE.ProcResourceIdx] += PRE.ReleaseAtCycle;
4177 int Result = (NumMops + IssueWidth - 1) / IssueWidth;
4180 dbgs() <<
"#Mops: " << NumMops <<
", "
4181 <<
"IssueWidth: " << IssueWidth <<
", "
4182 <<
"Cycles: " << Result <<
"\n";
4187 std::stringstream SS;
4188 SS << std::setw(2) <<
"ID" << std::setw(16) <<
"Name" << std::setw(10)
4189 <<
"Units" << std::setw(10) <<
"Consumed" << std::setw(10) <<
"Cycles"
4194 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
4196 int Cycles = (ResourceCount[
I] +
Desc->NumUnits - 1) /
Desc->NumUnits;
4199 std::stringstream SS;
4200 SS << std::setw(2) <<
I << std::setw(16) <<
Desc->Name << std::setw(10)
4201 <<
Desc->NumUnits << std::setw(10) << ResourceCount[
I]
4202 << std::setw(10) << Cycles <<
"\n";
4206 if (Cycles > Result)
4213 InitiationInterval =
II;
4214 DFAResources.clear();
4215 DFAResources.resize(
II);
4216 for (
auto &
I : DFAResources)
4217 I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
4220 NumScheduledMops.clear();
4221 NumScheduledMops.resize(
II);
4225 if (Pred.isArtificial() || Dst->isBoundaryNode())
4230 return IgnoreAnti && (Pred.getKind() ==
SDep::Kind::Anti || Distance != 0);
4233SwingSchedulerDDG::SwingSchedulerDDGEdges &
4234SwingSchedulerDDG::getEdges(
const SUnit *SU) {
4236 return EntrySUEdges;
4242const SwingSchedulerDDG::SwingSchedulerDDGEdges &
4243SwingSchedulerDDG::getEdges(
const SUnit *SU)
const {
4245 return EntrySUEdges;
4251void SwingSchedulerDDG::addEdge(
const SUnit *SU,
4252 const SwingSchedulerDDGEdge &
Edge) {
4254 "Validation-only edges are not expected here.");
4255 auto &Edges = getEdges(SU);
4256 if (
Edge.getSrc() == SU)
4257 Edges.Succs.push_back(
Edge);
4259 Edges.Preds.push_back(
Edge);
4262void SwingSchedulerDDG::initEdges(SUnit *SU) {
4263 for (
const auto &PI : SU->
Preds) {
4264 SwingSchedulerDDGEdge
Edge(SU, PI,
false,
4269 for (
const auto &SI : SU->
Succs) {
4270 SwingSchedulerDDGEdge
Edge(SU, SI,
true,
4278 : EntrySU(EntrySU), ExitSU(ExitSU) {
4279 EdgesVec.resize(SUnits.size());
4284 for (
auto &SU : SUnits)
4288 for (
SUnit &SU : SUnits) {
4293 for (
SUnit *Dst : *OD) {
4296 Edge.setDistance(1);
4297 ValidationOnlyEdges.push_back(Edge);
4303const SwingSchedulerDDG::EdgesType &
4305 return getEdges(SU).Preds;
4308const SwingSchedulerDDG::EdgesType &
4310 return getEdges(SU).Succs;
4317 auto ExpandCycle = [&](
SUnit *SU) {
4324 SUnit *Src = Edge.getSrc();
4325 SUnit *Dst = Edge.getDst();
4326 if (!Src->isInstr() || !Dst->isInstr())
4328 int CycleSrc = ExpandCycle(Src);
4329 int CycleDst = ExpandCycle(Dst);
4330 int MaxLateStart = CycleDst + Edge.getDistance() *
II - Edge.getLatency();
4331 if (CycleSrc > MaxLateStart) {
4333 dbgs() <<
"Validation failed for edge from " << Src->NodeNum <<
" to "
4334 << Dst->NodeNum <<
"\n";
4344 for (
SUnit &SU : SUnits) {
4373 !
TII->isGlobalMemoryObject(FromMI) &&
4391 const auto DumpSU = [](
const SUnit *SU) {
4392 std::ostringstream OSS;
4393 OSS <<
"SU(" << SU->
NodeNum <<
")";
4397 dbgs() <<
" Loop carried edges from " << DumpSU(SU) <<
"\n"
4399 for (
SUnit *Dst : *Order)
4400 dbgs() <<
" " << DumpSU(Dst) <<
"\n";
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::optional< unsigned > getTag(const TargetRegisterInfo *TRI, const MachineInstr &MI, const LoadInfo &LI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file defines the DenseMap class.
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)
print mir2vec MIR2Vec Vocabulary Printer Pass
static cl::opt< int > SwpForceII("pipeliner-force-ii", cl::desc("Force pipeliner to use specified II."), cl::Hidden, cl::init(-1))
A command line argument to force pipeliner to use specified initial interval.
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
static bool hasPHICycleDFS(unsigned Reg, const DenseMap< unsigned, SmallVector< unsigned, 2 > > &PhiDeps, SmallSet< unsigned, 8 > &Visited, SmallSet< unsigned, 8 > &RecStack)
Depth-first search to detect cycles among PHI dependencies.
static cl::opt< bool > MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false), cl::desc("Use the MVE code generator for software pipelining"))
static cl::opt< int > RegPressureMargin("pipeliner-register-pressure-margin", cl::Hidden, cl::init(5), cl::desc("Margin representing the unused percentage of " "the register pressure limit"))
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, Register &InitVal, Register &LoopVal)
Return the register values for the operands of a Phi instruction.
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
static void computeScheduledInsts(const SwingSchedulerDAG *SSD, SMSchedule &Schedule, std::vector< MachineInstr * > &OrderedInsts, DenseMap< MachineInstr *, unsigned > &Stages)
Create an instruction stream that represents a single iteration and stage of each instruction.
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
static Register getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
static bool findLoopIncrementValue(const MachineOperand &Op, int &Value)
When Op is a value that is incremented recursively in a loop and there is a unique instruction that i...
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::desc("Ignore RecMII"))
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
static cl::opt< unsigned > SwpMaxNumStores("pipeliner-max-num-stores", cl::desc("Maximum number of stores allwed in the target loop."), cl::Hidden, cl::init(200))
A command line argument to limit the number of store instructions in the target basic block.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
static bool hasPHICycle(const MachineBasicBlock *LoopHeader, const MachineRegisterInfo &MRI)
static cl::opt< WindowSchedulingFlag > WindowSchedulingOption("window-sched", cl::Hidden, cl::init(WindowSchedulingFlag::WS_On), cl::desc("Set how to use window scheduling algorithm."), cl::values(clEnumValN(WindowSchedulingFlag::WS_Off, "off", "Turn off window algorithm."), clEnumValN(WindowSchedulingFlag::WS_On, "on", "Use window algorithm after SMS algorithm fails."), clEnumValN(WindowSchedulingFlag::WS_Force, "force", "Use window algorithm instead of SMS algorithm.")))
A command line argument to set the window scheduling option.
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
static cl::opt< int > SwpIISearchRange("pipeliner-ii-search-range", cl::desc("Range to search for II"), cl::Hidden, cl::init(10))
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited, SwingSchedulerDDG *DDG)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
static cl::opt< bool > LimitRegPressure("pipeliner-register-pressure", cl::Hidden, cl::init(false), cl::desc("Limit register pressure of scheduled loop"))
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
static bool hasLoopCarriedMemDep(const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst, BatchAAResults &BAA, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, const SwingSchedulerDAG *SSD)
Returns true if there is a loop-carried order dependency from Src to Dst.
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
static Register findUniqueOperandDefinedInLoop(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
Promote Memory to Register
This file provides utility analysis objects describing memory locations.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PriorityQueue class.
Remove Loads Into Fake Uses
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Target-Independent Code Generator Pass Configuration Options pass.
Add loop-carried chain dependencies.
void computeDependencies()
The main function to compute loop-carried order-dependencies.
const BitVector & getLoopCarried(unsigned Idx) const
LoopCarriedOrderDepsTracker(SwingSchedulerDAG *SSD, BatchAAResults *BAA, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
AttributeList getAttributes() const
Return the attribute list for this Function.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isPostIncrement(const MachineInstr &MI) const override
Return true for post-incremented instructions.
DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &STI) const override
Create machine specific model for scheduling.
bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const override
For instructions with a base and offset, return the position of the base register and offset operands...
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
bool isEmpty() const
Returns true if there are no itineraries.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
TypeSize getValue() const
Represents a single loop in the control flow graph.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI StringRef getString() const
MachineInstrBundleIterator< const MachineInstr > const_iterator
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
filtered_mop_range all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool isRegSequence() const
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
const Value * getValue() const
Return the base address of the memory access.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
The main class in the implementation of the target independent software pipeliner pass.
bool runOnMachineFunction(MachineFunction &MF) override
The "main" function for implementing Swing Modulo Scheduling.
const TargetInstrInfo * TII
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineDominatorTree * MDT
const MachineLoopInfo * MLI
MachineOptimizationRemarkEmitter * ORE
RegisterClassInfo RegClassInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_instr_iterator< true, false, false, true > use_instr_iterator
use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the specified register,...
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
Expand the kernel using modulo variable expansion algorithm (MVE).
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
SUnit * getNode(unsigned i) const
void print(raw_ostream &os) const
void setRecMII(unsigned mii)
unsigned count(SUnit *SU) const
void setColocate(unsigned c)
int compareRecMII(NodeSet &RHS)
LLVM_DUMP_METHOD void dump() const
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A reimplementation of ModuloScheduleExpander.
PointerIntPair - This class implements a pair of a pointer and small integer.
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void addLiveRegs(ArrayRef< VRegMaskOrUnit > Regs)
Force liveness of virtual registers or physical register units.
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
int calculateResMII() const
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
void init(int II)
Initialize resources with the initiation interval II.
bool canReserveResources(SUnit &SU, int Cycle)
Check if the resources occupied by a machine instruction are available in the current state.
Kind
These are the different kinds of scheduling dependencies.
@ Order
Any other ordering dependency.
@ Anti
A register anti-dependence (aka WAR).
@ Data
Regular data dependence (aka true-dependence).
void setLatency(unsigned Lat)
Sets the latency for this edge.
@ Barrier
An unknown scheduling barrier.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
This class represents the scheduled code.
std::deque< SUnit * > reorderInstructions(const SwingSchedulerDAG *SSD, const std::deque< SUnit * > &Instrs) const
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
void dump() const
Utility function used for debugging to print the schedule.
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
int earliestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the earliest scheduled instruction in the dependence chain.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU, const SwingSchedulerDDG *DDG) const
Return true if all scheduled predecessors are loop-carried output/order dependencies.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts) const
Order the instructions within a cycle so that the definitions occur before the uses.
bool isValidSchedule(SwingSchedulerDAG *SSD)
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
DenseMap< int, std::deque< SUnit * > >::const_iterator const_sched_iterator
bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO) const
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
SmallPtrSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Determine transitive dependences of unpipelineable instructions.
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const
Return true if the scheduled Phi has a loop carried operand.
int latestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the latest scheduled instruction in the dependence chain.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
LLVM_ABI void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
unsigned short Latency
Node latency.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
bool hasPhysRegDefs
Has physreg defs that are being used.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
LLVM_ABI bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock * BB
The block in which to insert instructions.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
void dump() const override
LLVM_ABI void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
LLVM_ABI bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SUnit EntrySU
Special node for the region entry.
MachineFunction & MF
Machine function.
SUnit ExitSU
Special node for the region exit.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void insert_range(Range &&R)
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
typename vector_type::const_iterator iterator
bool contains(const_arg_type key) const
Check if the SetVector contains the given key.
void clear()
Completely clear the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
const SwingSchedulerDDG * getDDG() const
void finishBlock() override
Clean up after the software pipeliner runs.
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const
Return true for an order or output dependence that is loop carried potentially.
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
bool mayOverlapInLaterIter(const MachineInstr *BaseMI, const MachineInstr *OtherMI) const
Return false if there is no overlap between the region accessed by BaseMI in an iteration and the reg...
Register getInstrBaseReg(SUnit *SU) const
Return the new base register that was stored away for the changed instruction.
Represents a dependence between two instruction.
SUnit * getDst() const
Returns the SUnit to which the edge points (destination node).
bool ignoreDependence(bool IgnoreAnti) const
Returns true for DDG nodes that we ignore when computing the cost functions.
SUnit * getSrc() const
Returns the SUnit from which the edge comes (source node).
This class provides APIs to retrieve edges from/to an SUnit node, with a particular focus on loop-car...
SwingSchedulerDDG(std::vector< SUnit > &SUnits, SUnit *EntrySU, SUnit *ExitSU, const LoopCarriedEdges &LCE)
const EdgesType & getInEdges(const SUnit *SU) const
bool isValidSchedule(const SMSchedule &Schedule) const
Check if Schedule doesn't violate the validation-only dependencies.
const EdgesType & getOutEdges(const SUnit *SU) const
Object returned by analyzeLoopForPipelining.
virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const =0
Return true if the given instruction should not be pipelined and should be ignored.
TargetInstrInfo - Interface to description of machine instruction set.
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool enableMachinePipeliner() const
True if the subtarget should run MachinePipeliner.
virtual bool useDFAforSMS() const
Default to DFA for resource management, return false when target will use ProcResource in InstrSchedM...
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Wrapper class representing a virtual register or register unit.
constexpr bool isVirtualReg() const
constexpr MCRegUnit asMCRegUnit() const
constexpr Register asVirtualReg() const
The main class in the implementation of the target independent window scheduler.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
@ Valid
The data is already valid.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< DefNode * > Def
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
std::set< NodeId > NodeSet
friend class Instruction
Iterator for Instructions in a `BasicBlock.
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void stable_sort(R &&Range)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
@ WS_Force
Use window algorithm after SMS algorithm fails.
@ WS_On
Turn off window algorithm.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
cl::opt< bool > SwpEnableCopyToPhi
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
LLVM_ABI char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.
@ Increment
Incrementally increasing token ID.
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
LLVM_ABI bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This class holds an SUnit corresponding to a memory operation and other information related to the in...
const Value * MemOpValue
The value of a memory operand.
SmallVector< const Value *, 2 > UnderlyingObjs
bool isTriviallyDisjoint(const SUnitWithMemInfo &Other) const
int64_t MemOpOffset
The offset of a memory operand.
bool IsAllIdentified
True if all the underlying objects are identified.
SUnitWithMemInfo(SUnit *SU)
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
uint64_t FuncUnits
Bitmask representing a set of functional units.
static constexpr LaneBitmask getNone()
Represents loop-carried dependencies.
SmallSetVector< SUnit *, 8 > OrderDep
const OrderDep * getOrderDepOrNull(SUnit *Key) const
void modifySUnits(std::vector< SUnit > &SUnits, const TargetInstrInfo *TII)
Adds some edges to the original DAG that correspond to loop-carried dependencies.
void dump(SUnit *SU, const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI) const
Define a kind of processor resource that will be modeled by the scheduler.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.