72#include "llvm/Config/llvm-config.h"
100#define DEBUG_TYPE "pipeliner"
102STATISTIC(NumTrytoPipeline,
"Number of loops that we attempt to pipeline");
103STATISTIC(NumPipelined,
"Number of loops software pipelined");
104STATISTIC(NumNodeOrderIssues,
"Number of node order issues found");
105STATISTIC(NumFailBranch,
"Pipeliner abort due to unknown branch");
106STATISTIC(NumFailLoop,
"Pipeliner abort due to unsupported loop");
107STATISTIC(NumFailPreheader,
"Pipeliner abort due to missing preheader");
108STATISTIC(NumFailLargeMaxMII,
"Pipeliner abort due to MaxMII too large");
109STATISTIC(NumFailZeroMII,
"Pipeliner abort due to zero MII");
110STATISTIC(NumFailNoSchedule,
"Pipeliner abort due to no schedule found");
111STATISTIC(NumFailZeroStage,
"Pipeliner abort due to zero stage");
112STATISTIC(NumFailLargeMaxStage,
"Pipeliner abort due to too many stages");
113STATISTIC(NumFailTooManyStores,
"Pipeliner abort due to too many stores");
117 cl::desc(
"Enable Software Pipelining"));
126 cl::desc(
"Size limit for the MII."),
132 cl::desc(
"Force pipeliner to use specified II."),
138 cl::desc(
"Maximum stages allowed in the generated scheduled."),
145 cl::desc(
"Prune dependences between unrelated Phi nodes."),
152 cl::desc(
"Prune loop carried order dependences."),
170 cl::desc(
"Instead of emitting the pipelined code, annotate instructions "
171 "with the generated schedule for feeding into the "
172 "-modulo-schedule-test pass"));
177 "Use the experimental peeling code generator for software pipelining"));
185 cl::desc(
"Limit register pressure of scheduled loop"));
190 cl::desc(
"Margin representing the unused percentage of "
191 "the register pressure limit"));
195 cl::desc(
"Use the MVE code generator for software pipelining"));
200 "pipeliner-max-num-stores",
208 cl::desc(
"Enable CopyToPhi DAG Mutation"));
213 "pipeliner-force-issue-width",
220 cl::desc(
"Set how to use window scheduling algorithm."),
222 "Turn off window algorithm."),
224 "Use window algorithm after SMS algorithm fails."),
226 "Use window algorithm instead of SMS algorithm.")));
228unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
236 "Modulo Software Pipelining",
false,
false)
278 enum class InstrTag {
287 TaggedSUnit(
SUnit *SU, InstrTag Tag)
290 InstrTag
getTag()
const {
return InstrTag(getInt()); }
294 struct LoadStoreChunk {
298 void append(
SUnit *SU);
303 std::vector<SUnit> &SUnits;
309 std::vector<BitVector> LoopCarried;
322 std::vector<TaggedSUnit> TaggedSUnits;
336 return LoopCarried[Idx];
341 std::optional<InstrTag> getInstrTag(
SUnit *SU)
const;
343 void addLoopCarriedDepenenciesForChunks(
const LoadStoreChunk &From,
344 const LoadStoreChunk &To);
355 bool PerformCheapCheck =
false);
357 void computeDependenciesAux();
388 TII =
MF->getSubtarget().getInstrInfo();
391 for (
const auto &L : *
MLI)
401bool MachinePipeliner::scheduleLoop(
MachineLoop &L) {
403 for (
const auto &InnerLoop : L)
404 Changed |= scheduleLoop(*InnerLoop);
416 setPragmaPipelineOptions(L);
417 if (!canPipelineLoop(L)) {
421 L.getStartLoc(), L.getHeader())
422 <<
"Failed to pipeline loop";
425 LI.LoopPipelinerInfo.reset();
430 if (useSwingModuloScheduler())
431 Changed = swingModuloScheduler(L);
433 if (useWindowScheduler(
Changed))
434 Changed = runWindowScheduler(L);
436 LI.LoopPipelinerInfo.reset();
440void MachinePipeliner::setPragmaPipelineOptions(
MachineLoop &L) {
445 MachineBasicBlock *LBLK =
L.getTopBlock();
458 MDNode *LoopID = TI->
getMetadata(LLVMContext::MD_loop);
459 if (LoopID ==
nullptr)
476 if (S->
getString() ==
"llvm.loop.pipeline.initiationinterval") {
478 "Pipeline initiation interval hint metadata should have two operands.");
482 }
else if (S->
getString() ==
"llvm.loop.pipeline.disable") {
495 auto It = PhiDeps.find(
Reg);
496 if (It == PhiDeps.end())
507 for (
unsigned Dep : It->second) {
522 unsigned DefReg =
MI.getOperand(0).getReg();
526 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2)
527 Ins->second.push_back(
MI.getOperand(
I).getReg());
534 for (
const auto &KV : PhiDeps) {
535 unsigned Reg = KV.first;
546bool MachinePipeliner::canPipelineLoop(
MachineLoop &L) {
547 if (
L.getNumBlocks() != 1) {
549 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
550 L.getStartLoc(),
L.getHeader())
551 <<
"Not a single basic block: "
552 <<
ore::NV(
"NumBlocks",
L.getNumBlocks());
564 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
565 L.getStartLoc(),
L.getHeader())
566 <<
"Disabled by Pragma.";
576 if (
TII->analyzeBranch(*
L.getHeader(),
LI.TBB,
LI.FBB,
LI.BrCond)) {
577 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeBranch, can NOT pipeline Loop\n");
580 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
581 L.getStartLoc(),
L.getHeader())
582 <<
"The branch can't be understood";
587 LI.LoopInductionVar =
nullptr;
588 LI.LoopCompare =
nullptr;
589 LI.LoopPipelinerInfo =
TII->analyzeLoopForPipelining(
L.getTopBlock());
590 if (!
LI.LoopPipelinerInfo) {
591 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeLoop, can NOT pipeline Loop\n");
594 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
595 L.getStartLoc(),
L.getHeader())
596 <<
"The loop structure is not supported";
601 if (!
L.getLoopPreheader()) {
602 LLVM_DEBUG(
dbgs() <<
"Preheader not found, can NOT pipeline Loop\n");
605 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
606 L.getStartLoc(),
L.getHeader())
607 <<
"No loop preheader found";
612 unsigned NumStores = 0;
613 for (MachineInstr &
MI : *
L.getHeader())
618 NumFailTooManyStores++;
620 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
621 L.getStartLoc(),
L.getHeader())
622 <<
"Too many store instructions in the loop: "
623 <<
ore::NV(
"NumStores", NumStores) <<
" > "
630 preprocessPhiNodes(*
L.getHeader());
635 MachineRegisterInfo &
MRI =
MF->getRegInfo();
639 for (MachineInstr &PI :
B.phis()) {
640 MachineOperand &DefOp = PI.getOperand(0);
642 auto *RC =
MRI.getRegClass(DefOp.
getReg());
644 for (
unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
645 MachineOperand &RegOp = PI.getOperand(i);
652 MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
669bool MachinePipeliner::swingModuloScheduler(
MachineLoop &L) {
670 assert(
L.getBlocks().size() == 1 &&
"SMS works on single blocks only.");
673 SwingSchedulerDAG SMS(
677 MachineBasicBlock *
MBB =
L.getHeader();
695 return SMS.hasNewSchedule();
709bool MachinePipeliner::runWindowScheduler(
MachineLoop &L) {
717 Context.RegClassInfo->runOnMachineFunction(*
MF);
722bool MachinePipeliner::useSwingModuloScheduler() {
727bool MachinePipeliner::useWindowScheduler(
bool Changed) {
734 "llvm.loop.pipeline.initiationinterval is set.\n");
742void SwingSchedulerDAG::setMII(
unsigned ResMII,
unsigned RecMII) {
745 else if (II_setByPragma > 0)
746 MII = II_setByPragma;
748 MII = std::max(ResMII, RecMII);
751void SwingSchedulerDAG::setMAX_II() {
754 else if (II_setByPragma > 0)
755 MAX_II = II_setByPragma;
765 updatePhiDependences();
766 Topo.InitDAGTopologicalSorting();
772 dbgs() <<
"===== Loop Carried Edges Begin =====\n";
775 dbgs() <<
"===== Loop Carried Edges End =====\n";
778 NodeSetType NodeSets;
779 findCircuits(NodeSets);
780 NodeSetType Circuits = NodeSets;
783 unsigned ResMII = calculateResMII();
784 unsigned RecMII = calculateRecMII(NodeSets);
792 setMII(ResMII, RecMII);
796 <<
" (rec=" << RecMII <<
", res=" << ResMII <<
")\n");
802 Pass.ORE->emit([&]() {
804 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
805 <<
"Invalid Minimal Initiation Interval: 0";
813 <<
", we don't pipeline large loops\n");
814 NumFailLargeMaxMII++;
815 Pass.ORE->emit([&]() {
817 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
818 <<
"Minimal Initiation Interval too large: "
819 <<
ore::NV(
"MII", (
int)MII) <<
" > "
821 <<
"Refer to -pipeliner-max-mii.";
826 computeNodeFunctions(NodeSets);
828 registerPressureFilter(NodeSets);
830 colocateNodeSets(NodeSets);
832 checkNodeSets(NodeSets);
835 for (
auto &
I : NodeSets) {
836 dbgs() <<
" Rec NodeSet ";
843 groupRemainingNodes(NodeSets);
845 removeDuplicateNodes(NodeSets);
848 for (
auto &
I : NodeSets) {
849 dbgs() <<
" NodeSet ";
854 computeNodeOrder(NodeSets);
857 checkValidNodeOrder(Circuits);
860 Scheduled = schedulePipeline(Schedule);
865 Pass.ORE->emit([&]() {
867 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
868 <<
"Unable to find schedule";
875 if (numStages == 0) {
878 Pass.ORE->emit([&]() {
880 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
881 <<
"No need to pipeline - no overlapped iterations in schedule.";
888 <<
" : too many stages, abort\n");
889 NumFailLargeMaxStage++;
890 Pass.ORE->emit([&]() {
892 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
893 <<
"Too many stages in schedule: "
894 <<
ore::NV(
"numStages", (
int)numStages) <<
" > "
896 <<
". Refer to -pipeliner-max-stages.";
901 Pass.ORE->emit([&]() {
904 <<
"Pipelined succesfully!";
909 std::vector<MachineInstr *> OrderedInsts;
913 OrderedInsts.push_back(SU->getInstr());
914 Cycles[SU->getInstr()] =
Cycle;
919 for (
auto &KV : NewMIs) {
920 Cycles[KV.first] = Cycles[KV.second];
921 Stages[KV.first] = Stages[KV.second];
922 NewInstrChanges[KV.first] = InstrChanges[
getSUnit(KV.first)];
929 "Cannot serialize a schedule with InstrChanges!");
939 LoopPipelinerInfo->isMVEExpanderSupported() &&
953 for (
auto &KV : NewMIs)
954 MF.deleteMachineInstr(KV.second);
965 assert(Phi.isPHI() &&
"Expecting a Phi.");
969 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
970 if (Phi.getOperand(i + 1).getMBB() !=
Loop)
971 InitVal = Phi.getOperand(i).getReg();
973 LoopVal = Phi.getOperand(i).getReg();
975 assert(InitVal && LoopVal &&
"Unexpected Phi structure.");
981 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
982 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
983 return Phi.getOperand(i).getReg();
992 while (!Worklist.
empty()) {
994 for (
const auto &
SI : SU->
Succs) {
997 if (Visited.
count(SuccSU))
1010 if (!getUnderlyingObjects())
1035bool SUnitWithMemInfo::getUnderlyingObjects() {
1037 if (!
MI->hasOneMemOperand())
1059 if (Src.isTriviallyDisjoint(Dst))
1066 if (PerformCheapCheck) {
1073 int64_t Offset1, Offset2;
1074 bool Offset1IsScalable, Offset2IsScalable;
1075 if (
TII->getMemOperandWithOffset(SrcMI, BaseOp1, Offset1, Offset1IsScalable,
1077 TII->getMemOperandWithOffset(DstMI, BaseOp2, Offset2, Offset2IsScalable,
1080 Offset1IsScalable == Offset2IsScalable &&
1081 (
int)Offset1 < (
int)Offset2) {
1082 assert(
TII->areMemAccessesTriviallyDisjoint(SrcMI, DstMI) &&
1083 "What happened to the chain edge?");
1095 if (Src.isUnknown() || Dst.isUnknown())
1097 if (Src.MemOpValue == Dst.MemOpValue && Src.MemOpOffset <= Dst.MemOpOffset)
1108 for (
const Value *SrcObj : Src.UnderlyingObjs)
1109 for (
const Value *DstObj : Dst.UnderlyingObjs)
1117void LoopCarriedOrderDepsTracker::LoadStoreChunk::append(SUnit *SU) {
1119 if (!
MI->mayLoadOrStore())
1121 (
MI->mayStore() ? Stores : Loads).emplace_back(SU);
1127 : DAG(SSD), BAA(BAA), SUnits(DAG->SUnits), N(SUnits.
size()),
1128 LoopCarried(N,
BitVector(N)), TII(TII), TRI(TRI) {}
1132 for (
auto &SU : SUnits) {
1133 auto Tagged = getInstrTag(&SU);
1138 TaggedSUnits.emplace_back(&SU, *Tagged);
1141 computeDependenciesAux();
1144std::optional<LoopCarriedOrderDepsTracker::InstrTag>
1145LoopCarriedOrderDepsTracker::getInstrTag(
SUnit *SU)
const {
1147 if (
TII->isGlobalMemoryObject(
MI))
1148 return InstrTag::Barrier;
1150 if (
MI->mayStore() ||
1151 (
MI->mayLoad() && !
MI->isDereferenceableInvariantLoad()))
1152 return InstrTag::LoadOrStore;
1154 if (
MI->mayRaiseFPException())
1155 return InstrTag::FPExceptions;
1157 return std::nullopt;
1160void LoopCarriedOrderDepsTracker::addDependenciesBetweenSUs(
1161 const SUnitWithMemInfo &Src,
const SUnitWithMemInfo &Dst,
1162 bool PerformCheapCheck) {
1164 if (Src.SU == Dst.SU)
1168 LoopCarried[Src.SU->NodeNum].set(Dst.SU->NodeNum);
1171void LoopCarriedOrderDepsTracker::addLoopCarriedDepenenciesForChunks(
1172 const LoadStoreChunk &From,
const LoadStoreChunk &To) {
1174 for (
const SUnitWithMemInfo &Src : From.Loads)
1175 for (
const SUnitWithMemInfo &Dst : To.Stores)
1177 addDependenciesBetweenSUs(Src, Dst, Src.SU->NodeNum < Dst.SU->NodeNum);
1180 for (
const SUnitWithMemInfo &Src : From.Stores)
1181 for (
const SUnitWithMemInfo &Dst : To.Loads)
1182 addDependenciesBetweenSUs(Src, Dst);
1185 for (
const SUnitWithMemInfo &Src : From.Stores)
1186 for (
const SUnitWithMemInfo &Dst : To.Stores)
1187 addDependenciesBetweenSUs(Src, Dst);
1190void LoopCarriedOrderDepsTracker::computeDependenciesAux() {
1192 for (
const auto &TSU : TaggedSUnits) {
1193 InstrTag
Tag = TSU.getTag();
1194 SUnit *SU = TSU.getPointer();
1196 case InstrTag::Barrier:
1197 Chunks.emplace_back();
1199 case InstrTag::LoadOrStore:
1200 Chunks.back().append(SU);
1202 case InstrTag::FPExceptions:
1211 for (
const LoadStoreChunk &Chunk : Chunks)
1212 addLoopCarriedDepenenciesForChunks(Chunk, Chunk);
1224LoopCarriedEdges SwingSchedulerDAG::addLoopCarriedDependences() {
1225 LoopCarriedEdges LCE;
1229 LCODTracker.computeDependencies();
1230 for (
unsigned I = 0;
I != SUnits.size();
I++)
1231 for (
const int Succ : LCODTracker.getLoopCarried(
I).set_bits())
1244void SwingSchedulerDAG::updatePhiDependences() {
1246 const TargetSubtargetInfo &
ST = MF.getSubtarget<TargetSubtargetInfo>();
1249 for (SUnit &
I : SUnits) {
1254 MachineInstr *
MI =
I.getInstr();
1256 for (
const MachineOperand &MO :
MI->operands()) {
1263 UI =
MRI.use_instr_begin(
Reg),
1264 UE =
MRI.use_instr_end();
1266 MachineInstr *
UseMI = &*UI;
1267 SUnit *SU = getSUnit(
UseMI);
1293 }
else if (MO.isUse()) {
1296 if (
DefMI ==
nullptr)
1298 SUnit *SU = getSUnit(
DefMI);
1303 ST.adjustSchedDependency(SU, 0, &
I, MO.getOperandNo(), Dep,
1310 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
1319 for (
auto &PI :
I.Preds) {
1320 MachineInstr *PMI = PI.getSUnit()->getInstr();
1322 if (
I.getInstr()->isPHI()) {
1331 for (
const SDep &
D : RemoveDeps)
1338void SwingSchedulerDAG::changeDependences() {
1342 for (SUnit &
I : SUnits) {
1343 unsigned BasePos = 0, OffsetPos = 0;
1345 int64_t NewOffset = 0;
1346 if (!canUseLastOffsetValue(
I.getInstr(), BasePos, OffsetPos, NewBase,
1351 Register OrigBase =
I.getInstr()->getOperand(BasePos).getReg();
1352 MachineInstr *
DefMI =
MRI.getUniqueVRegDef(OrigBase);
1355 SUnit *DefSU = getSUnit(
DefMI);
1359 MachineInstr *LastMI =
MRI.getUniqueVRegDef(NewBase);
1362 SUnit *LastSU = getSUnit(LastMI);
1366 if (Topo.IsReachable(&
I, LastSU))
1371 for (
const SDep &
P :
I.Preds)
1372 if (
P.getSUnit() == DefSU)
1374 for (
const SDep &
D : Deps) {
1375 Topo.RemovePred(&
I,
D.getSUnit());
1380 for (
auto &
P : LastSU->
Preds)
1383 for (
const SDep &
D : Deps) {
1384 Topo.RemovePred(LastSU,
D.getSUnit());
1391 Topo.AddPred(LastSU, &
I);
1396 InstrChanges[&
I] = std::make_pair(NewBase, NewOffset);
1407 std::vector<MachineInstr *> &OrderedInsts,
1415 Stage <= LastStage; ++Stage) {
1418 Instrs[
Cycle].push_front(SU);
1425 std::deque<SUnit *> &CycleInstrs = Instrs[
Cycle];
1427 for (
SUnit *SU : CycleInstrs) {
1429 OrderedInsts.push_back(
MI);
1439struct FuncUnitSorter {
1440 const InstrItineraryData *InstrItins;
1441 const MCSubtargetInfo *STI;
1442 DenseMap<InstrStage::FuncUnits, unsigned> Resources;
1444 FuncUnitSorter(
const TargetSubtargetInfo &TSI)
1445 : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
1450 unsigned minFuncUnits(
const MachineInstr *Inst,
1453 unsigned min = UINT_MAX;
1454 if (InstrItins && !InstrItins->
isEmpty()) {
1455 for (
const InstrStage &IS :
1457 InstrItins->
endStage(SchedClass))) {
1460 if (numAlternatives < min) {
1461 min = numAlternatives;
1468 const MCSchedClassDesc *SCDesc =
1475 for (
const MCWriteProcResEntry &PRE :
1478 if (!PRE.ReleaseAtCycle)
1480 const MCProcResourceDesc *ProcResource =
1482 unsigned NumUnits = ProcResource->
NumUnits;
1483 if (NumUnits < min) {
1485 F = PRE.ProcResourceIdx;
1490 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1498 void calcCriticalResources(MachineInstr &
MI) {
1499 unsigned SchedClass =
MI.getDesc().getSchedClass();
1500 if (InstrItins && !InstrItins->
isEmpty()) {
1501 for (
const InstrStage &IS :
1503 InstrItins->
endStage(SchedClass))) {
1506 Resources[FuncUnits]++;
1511 const MCSchedClassDesc *SCDesc =
1518 for (
const MCWriteProcResEntry &PRE :
1521 if (!PRE.ReleaseAtCycle)
1523 Resources[PRE.ProcResourceIdx]++;
1527 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1531 bool operator()(
const MachineInstr *IS1,
const MachineInstr *IS2)
const {
1533 unsigned MFUs1 = minFuncUnits(IS1, F1);
1534 unsigned MFUs2 = minFuncUnits(IS2, F2);
1537 return MFUs1 > MFUs2;
1542class HighRegisterPressureDetector {
1543 MachineBasicBlock *OrigMBB;
1544 const MachineRegisterInfo &
MRI;
1545 const TargetRegisterInfo *
TRI;
1547 const unsigned PSetNum;
1553 std::vector<unsigned> InitSetPressure;
1557 std::vector<unsigned> PressureSetLimit;
1559 DenseMap<MachineInstr *, RegisterOperands> ROMap;
1561 using Instr2LastUsesTy = DenseMap<MachineInstr *, SmallDenseSet<Register, 4>>;
1564 using OrderedInstsTy = std::vector<MachineInstr *>;
1565 using Instr2StageTy = DenseMap<MachineInstr *, unsigned>;
1568 static void dumpRegisterPressures(
const std::vector<unsigned> &Pressures) {
1569 if (Pressures.size() == 0) {
1573 for (
unsigned P : Pressures) {
1584 VirtRegOrUnit VRegOrUnit =
1586 : VirtRegOrUnit(static_cast<MCRegUnit>(
Reg.id()));
1587 for (
auto PSetIter =
MRI.getPressureSets(VRegOrUnit); PSetIter.isValid();
1589 dbgs() << *PSetIter <<
' ';
1594 void increaseRegisterPressure(std::vector<unsigned> &Pressure,
1597 VirtRegOrUnit VRegOrUnit =
1599 : VirtRegOrUnit(static_cast<MCRegUnit>(
Reg.id()));
1600 auto PSetIter =
MRI.getPressureSets(VRegOrUnit);
1601 unsigned Weight = PSetIter.getWeight();
1602 for (; PSetIter.isValid(); ++PSetIter)
1603 Pressure[*PSetIter] += Weight;
1606 void decreaseRegisterPressure(std::vector<unsigned> &Pressure,
1608 auto PSetIter =
MRI.getPressureSets(VirtRegOrUnit(
Reg));
1609 unsigned Weight = PSetIter.getWeight();
1610 for (; PSetIter.isValid(); ++PSetIter) {
1611 auto &
P = Pressure[*PSetIter];
1613 "register pressure must be greater than or equal weight");
1635 void computeLiveIn() {
1636 DenseSet<Register>
Used;
1637 for (
auto &
MI : *OrigMBB) {
1638 if (
MI.isDebugInstr())
1640 for (
auto &Use : ROMap[&
MI].
Uses) {
1643 Use.VRegOrUnit.isVirtualReg()
1644 ?
Use.VRegOrUnit.asVirtualReg()
1645 :
Register(
static_cast<unsigned>(
Use.VRegOrUnit.asMCRegUnit()));
1650 if (isReservedRegister(
Reg))
1652 if (isDefinedInThisLoop(
Reg))
1658 for (
auto LiveIn : Used)
1659 increaseRegisterPressure(InitSetPressure, LiveIn);
1663 void computePressureSetLimit(
const RegisterClassInfo &RCI) {
1664 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1679 Instr2LastUsesTy computeLastUses(
const OrderedInstsTy &OrderedInsts,
1680 Instr2StageTy &Stages)
const {
1685 DenseSet<Register> TargetRegs;
1686 const auto UpdateTargetRegs = [
this, &TargetRegs](
Register Reg) {
1687 if (isDefinedInThisLoop(
Reg))
1690 for (MachineInstr *
MI : OrderedInsts) {
1693 UpdateTargetRegs(
Reg);
1695 for (
auto &Use : ROMap.
find(
MI)->getSecond().Uses) {
1698 ?
Use.VRegOrUnit.asVirtualReg()
1700 Use.VRegOrUnit.asMCRegUnit()));
1701 UpdateTargetRegs(
Reg);
1706 const auto InstrScore = [&Stages](MachineInstr *
MI) {
1707 return Stages[
MI] +
MI->isPHI();
1710 DenseMap<Register, MachineInstr *> LastUseMI;
1712 for (
auto &Use : ROMap.
find(
MI)->getSecond().Uses) {
1715 Use.VRegOrUnit.isVirtualReg()
1716 ?
Use.VRegOrUnit.asVirtualReg()
1717 :
Register(
static_cast<unsigned>(
Use.VRegOrUnit.asMCRegUnit()));
1722 MachineInstr *Orig = Ite->second;
1723 MachineInstr *
New =
MI;
1724 if (InstrScore(Orig) < InstrScore(New))
1730 Instr2LastUsesTy LastUses;
1731 for (
auto [
Reg,
MI] : LastUseMI)
1732 LastUses[
MI].insert(
Reg);
1748 std::vector<unsigned>
1749 computeMaxSetPressure(
const OrderedInstsTy &OrderedInsts,
1750 Instr2StageTy &Stages,
1751 const unsigned StageCount)
const {
1752 using RegSetTy = SmallDenseSet<Register, 16>;
1758 auto CurSetPressure = InitSetPressure;
1759 auto MaxSetPressure = InitSetPressure;
1760 auto LastUses = computeLastUses(OrderedInsts, Stages);
1763 dbgs() <<
"Ordered instructions:\n";
1764 for (MachineInstr *
MI : OrderedInsts) {
1765 dbgs() <<
"Stage " << Stages[
MI] <<
": ";
1770 const auto InsertReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1771 VirtRegOrUnit VRegOrUnit) {
1785 increaseRegisterPressure(CurSetPressure,
Reg);
1789 const auto EraseReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1795 if (!RegSet.contains(
Reg))
1800 decreaseRegisterPressure(CurSetPressure,
Reg);
1804 for (
unsigned I = 0;
I < StageCount;
I++) {
1805 for (MachineInstr *
MI : OrderedInsts) {
1806 const auto Stage = Stages[
MI];
1810 const unsigned Iter =
I - Stage;
1812 for (
auto &Def : ROMap.
find(
MI)->getSecond().Defs)
1813 InsertReg(LiveRegSets[Iter],
Def.VRegOrUnit);
1815 for (
auto LastUse : LastUses[
MI]) {
1818 EraseReg(LiveRegSets[Iter - 1], LastUse);
1820 EraseReg(LiveRegSets[Iter], LastUse);
1824 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1825 MaxSetPressure[PSet] =
1826 std::max(MaxSetPressure[PSet], CurSetPressure[PSet]);
1829 dbgs() <<
"CurSetPressure=";
1830 dumpRegisterPressures(CurSetPressure);
1831 dbgs() <<
" iter=" << Iter <<
" stage=" << Stage <<
":";
1837 return MaxSetPressure;
1841 HighRegisterPressureDetector(MachineBasicBlock *OrigMBB,
1842 const MachineFunction &MF)
1843 : OrigMBB(OrigMBB),
MRI(MF.getRegInfo()),
1844 TRI(MF.getSubtarget().getRegisterInfo()),
1845 PSetNum(
TRI->getNumRegPressureSets()), InitSetPressure(PSetNum, 0),
1846 PressureSetLimit(PSetNum, 0) {}
1850 void init(
const RegisterClassInfo &RCI) {
1851 for (MachineInstr &
MI : *OrigMBB) {
1852 if (
MI.isDebugInstr())
1854 ROMap[&
MI].collect(
MI, *
TRI,
MRI,
false,
true);
1858 computePressureSetLimit(RCI);
1863 bool detect(
const SwingSchedulerDAG *SSD, SMSchedule &Schedule,
1864 const unsigned MaxStage)
const {
1866 "the percentage of the margin must be between 0 to 100");
1868 OrderedInstsTy OrderedInsts;
1869 Instr2StageTy Stages;
1871 const auto MaxSetPressure =
1872 computeMaxSetPressure(OrderedInsts, Stages, MaxStage + 1);
1875 dbgs() <<
"Dump MaxSetPressure:\n";
1876 for (
unsigned I = 0;
I < MaxSetPressure.size();
I++) {
1877 dbgs() <<
format(
"MaxSetPressure[%d]=%d\n",
I, MaxSetPressure[
I]);
1882 for (
unsigned PSet = 0; PSet < PSetNum; PSet++) {
1883 unsigned Limit = PressureSetLimit[PSet];
1886 <<
" Margin=" << Margin <<
"\n");
1887 if (Limit < MaxSetPressure[PSet] + Margin) {
1890 <<
"Rejected the schedule because of too high register pressure\n");
1906unsigned SwingSchedulerDAG::calculateResMII() {
1908 ResourceManager
RM(&MF.getSubtarget(),
this);
1909 return RM.calculateResMII();
1918unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1919 unsigned RecMII = 0;
1921 for (NodeSet &Nodes : NodeSets) {
1925 unsigned Delay = Nodes.getLatency();
1926 unsigned Distance = 1;
1929 unsigned CurMII = (Delay + Distance - 1) / Distance;
1930 Nodes.setRecMII(CurMII);
1931 if (CurMII > RecMII)
1939void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1940 SwingSchedulerDAG *DAG) {
1941 BitVector
Added(SUnits.size());
1942 DenseMap<int, int> OutputDeps;
1943 for (
int i = 0, e = SUnits.size(); i != e; ++i) {
1946 for (
auto &OE : DAG->DDG->getOutEdges(&SUnits[i])) {
1949 if (OE.isOutputDep()) {
1950 int N = OE.getDst()->NodeNum;
1952 auto Dep = OutputDeps.
find(BackEdge);
1953 if (Dep != OutputDeps.
end()) {
1954 BackEdge = Dep->second;
1955 OutputDeps.
erase(Dep);
1957 OutputDeps[
N] = BackEdge;
1960 if (OE.getDst()->isBoundaryNode() || OE.isArtificial())
1972 int N = OE.getDst()->NodeNum;
1974 AdjK[i].push_back(
N);
1980 for (
auto &IE : DAG->DDG->getInEdges(&SUnits[i])) {
1981 SUnit *Src =
IE.getSrc();
1982 SUnit *Dst =
IE.getDst();
1985 if (
IE.isOrderDep() && Src->getInstr()->mayLoad()) {
1986 int N = Src->NodeNum;
1988 AdjK[i].push_back(
N);
1995 for (
auto &OD : OutputDeps)
1996 if (!
Added.test(OD.second)) {
1997 AdjK[OD.first].push_back(OD.second);
1998 Added.set(OD.second);
2004bool SwingSchedulerDAG::Circuits::circuit(
int V,
int S, NodeSetType &NodeSets,
2005 const SwingSchedulerDAG *DAG,
2007 SUnit *SV = &SUnits[
V];
2012 for (
auto W : AdjK[V]) {
2013 if (NumPaths > MaxPaths)
2024 if (!Blocked.test(W)) {
2025 if (circuit(W, S, NodeSets, DAG,
2026 Node2Idx->at(W) < Node2Idx->at(V) ?
true : HasBackedge))
2034 for (
auto W : AdjK[V]) {
2045void SwingSchedulerDAG::Circuits::unblock(
int U) {
2047 SmallPtrSet<SUnit *, 4> &BU =
B[
U];
2048 while (!BU.
empty()) {
2049 SmallPtrSet<SUnit *, 4>::iterator
SI = BU.
begin();
2050 assert(SI != BU.
end() &&
"Invalid B set.");
2053 if (Blocked.test(
W->NodeNum))
2054 unblock(
W->NodeNum);
2060void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
2061 Circuits Cir(SUnits, Topo);
2063 Cir.createAdjacencyStructure(
this);
2064 for (
int I = 0,
E = SUnits.size();
I !=
E; ++
I) {
2066 Cir.circuit(
I,
I, NodeSets,
this);
2088void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
2089 for (SUnit &SU : DAG->
SUnits) {
2099 for (
auto &Dep : SU.
Preds) {
2100 SUnit *TmpSU = Dep.getSUnit();
2101 MachineInstr *TmpMI = TmpSU->
getInstr();
2112 if (PHISUs.
size() == 0 || SrcSUs.
size() == 0)
2120 for (
auto &Dep : PHISUs[Index]->Succs) {
2124 SUnit *TmpSU = Dep.getSUnit();
2125 MachineInstr *TmpMI = TmpSU->
getInstr();
2134 if (UseSUs.
size() == 0)
2139 for (
auto *
I : UseSUs) {
2140 for (
auto *Src : SrcSUs) {
2156void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
2157 ScheduleInfo.resize(SUnits.size());
2160 for (
int I : Topo) {
2161 const SUnit &SU = SUnits[
I];
2168 for (
int I : Topo) {
2170 int zeroLatencyDepth = 0;
2171 SUnit *SU = &SUnits[
I];
2172 for (
const auto &IE : DDG->getInEdges(SU)) {
2173 SUnit *Pred =
IE.getSrc();
2174 if (
IE.getLatency() == 0)
2176 std::max(zeroLatencyDepth, getZeroLatencyDepth(Pred) + 1);
2177 if (
IE.ignoreDependence(
true))
2179 asap = std::max(asap, (
int)(getASAP(Pred) +
IE.getLatency() -
2180 IE.getDistance() * MII));
2182 maxASAP = std::max(maxASAP, asap);
2183 ScheduleInfo[
I].ASAP = asap;
2184 ScheduleInfo[
I].ZeroLatencyDepth = zeroLatencyDepth;
2190 int zeroLatencyHeight = 0;
2191 SUnit *SU = &SUnits[
I];
2192 for (
const auto &OE : DDG->getOutEdges(SU)) {
2193 SUnit *Succ = OE.getDst();
2196 if (OE.getLatency() == 0)
2198 std::max(zeroLatencyHeight, getZeroLatencyHeight(Succ) + 1);
2199 if (OE.ignoreDependence(
true))
2201 alap = std::min(alap, (
int)(getALAP(Succ) - OE.getLatency() +
2202 OE.getDistance() * MII));
2205 ScheduleInfo[
I].ALAP = alap;
2206 ScheduleInfo[
I].ZeroLatencyHeight = zeroLatencyHeight;
2210 for (NodeSet &
I : NodeSets)
2211 I.computeNodeSetInfo(
this);
2214 for (
unsigned i = 0; i < SUnits.size(); i++) {
2215 dbgs() <<
"\tNode " << i <<
":\n";
2216 dbgs() <<
"\t ASAP = " << getASAP(&SUnits[i]) <<
"\n";
2217 dbgs() <<
"\t ALAP = " << getALAP(&SUnits[i]) <<
"\n";
2218 dbgs() <<
"\t MOV = " << getMOV(&SUnits[i]) <<
"\n";
2219 dbgs() <<
"\t D = " << getDepth(&SUnits[i]) <<
"\n";
2220 dbgs() <<
"\t H = " << getHeight(&SUnits[i]) <<
"\n";
2221 dbgs() <<
"\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) <<
"\n";
2222 dbgs() <<
"\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) <<
"\n";
2237 SUnit *PredSU = IE.getSrc();
2238 if (S && S->count(PredSU) == 0)
2240 if (IE.ignoreDependence(
true))
2251 SUnit *SuccSU = OE.getDst();
2252 if (!OE.isAntiDep())
2254 if (S && S->count(SuccSU) == 0)
2260 return !Preds.
empty();
2273 SUnit *SuccSU = OE.getDst();
2274 if (S && S->count(SuccSU) == 0)
2276 if (OE.ignoreDependence(
false))
2287 SUnit *PredSU = IE.getSrc();
2288 if (!IE.isAntiDep())
2290 if (S && S->count(PredSU) == 0)
2296 return !Succs.
empty();
2312 if (!Visited.
insert(Cur).second)
2313 return Path.contains(Cur);
2314 bool FoundPath =
false;
2316 if (!OE.ignoreDependence(
false))
2318 computePath(OE.getDst(), Path, DestNodes, Exclude, Visited, DDG);
2320 if (IE.isAntiDep() && IE.getDistance() == 0)
2322 computePath(IE.getSrc(), Path, DestNodes, Exclude, Visited, DDG);
2337 for (
SUnit *SU : NS) {
2343 if (
Reg.isVirtual())
2345 else if (
MRI.isAllocatable(
Reg))
2346 for (MCRegUnit Unit :
TRI->regunits(
Reg.asMCReg()))
2350 for (
SUnit *SU : NS)
2354 if (
Reg.isVirtual()) {
2358 }
else if (
MRI.isAllocatable(
Reg)) {
2359 for (MCRegUnit Unit :
TRI->regunits(
Reg.asMCReg()))
2370void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
2371 for (
auto &NS : NodeSets) {
2375 IntervalPressure RecRegPressure;
2376 RegPressureTracker RecRPTracker(RecRegPressure);
2377 RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(),
false,
true);
2379 RecRPTracker.closeBottom();
2381 std::vector<SUnit *> SUnits(NS.begin(), NS.end());
2382 llvm::sort(SUnits, [](
const SUnit *
A,
const SUnit *
B) {
2383 return A->NodeNum >
B->NodeNum;
2386 for (
auto &SU : SUnits) {
2392 RecRPTracker.setPos(std::next(CurInstI));
2394 RegPressureDelta RPDelta;
2396 RecRPTracker.getMaxUpwardPressureDelta(SU->
getInstr(),
nullptr, RPDelta,
2401 dbgs() <<
"Excess register pressure: SU(" << SU->
NodeNum <<
") "
2404 NS.setExceedPressure(SU);
2407 RecRPTracker.recede();
2414void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
2415 unsigned Colocate = 0;
2416 for (
int i = 0, e = NodeSets.size(); i < e; ++i) {
2418 SmallSetVector<SUnit *, 8>
S1;
2421 for (
int j = i + 1;
j <
e; ++
j) {
2425 SmallSetVector<SUnit *, 8> S2;
2442void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
2447 for (
auto &NS : NodeSets) {
2448 if (NS.getRecMII() > 2)
2450 if (NS.getMaxDepth() > MII)
2459void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
2460 SetVector<SUnit *> NodesAdded;
2461 SmallPtrSet<SUnit *, 8> Visited;
2464 for (NodeSet &
I : NodeSets) {
2465 SmallSetVector<SUnit *, 8>
N;
2468 SetVector<SUnit *>
Path;
2469 for (SUnit *NI :
N) {
2471 computePath(NI, Path, NodesAdded,
I, Visited, DDG.get());
2478 if (
succ_L(NodesAdded,
N, DDG.get())) {
2479 SetVector<SUnit *>
Path;
2480 for (SUnit *NI :
N) {
2482 computePath(NI, Path,
I, NodesAdded, Visited, DDG.get());
2493 SmallSetVector<SUnit *, 8>
N;
2494 if (
succ_L(NodesAdded,
N, DDG.get()))
2496 addConnectedNodes(
I, NewSet, NodesAdded);
2497 if (!NewSet.
empty())
2498 NodeSets.push_back(NewSet);
2503 if (
pred_L(NodesAdded,
N, DDG.get()))
2505 addConnectedNodes(
I, NewSet, NodesAdded);
2506 if (!NewSet.
empty())
2507 NodeSets.push_back(NewSet);
2511 for (SUnit &SU : SUnits) {
2512 if (NodesAdded.
count(&SU) == 0) {
2514 addConnectedNodes(&SU, NewSet, NodesAdded);
2515 if (!NewSet.
empty())
2516 NodeSets.push_back(NewSet);
2522void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
2523 SetVector<SUnit *> &NodesAdded) {
2526 for (
auto &OE : DDG->getOutEdges(SU)) {
2528 if (!OE.isArtificial() && !
Successor->isBoundaryNode() &&
2530 addConnectedNodes(
Successor, NewSet, NodesAdded);
2532 for (
auto &IE : DDG->getInEdges(SU)) {
2533 SUnit *Predecessor =
IE.getSrc();
2534 if (!
IE.isArtificial() && NodesAdded.
count(Predecessor) == 0)
2535 addConnectedNodes(Predecessor, NewSet, NodesAdded);
2544 for (
SUnit *SU : Set1) {
2545 if (Set2.
count(SU) != 0)
2548 return !Result.empty();
2552void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
2553 for (NodeSetType::iterator
I = NodeSets.begin(),
E = NodeSets.end();
I !=
E;
2556 for (NodeSetType::iterator J =
I + 1; J !=
E;) {
2561 for (SUnit *SU : *J)
2573void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
2574 for (NodeSetType::iterator
I = NodeSets.begin(),
E = NodeSets.end();
I !=
E;
2576 for (NodeSetType::iterator J =
I + 1; J !=
E;) {
2577 J->remove_if([&](SUnit *SUJ) {
return I->count(SUJ); });
2592void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
2593 SmallSetVector<SUnit *, 8>
R;
2596 for (
auto &Nodes : NodeSets) {
2599 SmallSetVector<SUnit *, 8>
N;
2614 }
else if (NodeSets.size() == 1) {
2615 for (
const auto &
N : Nodes)
2616 if (
N->Succs.size() == 0)
2622 SUnit *maxASAP =
nullptr;
2623 for (SUnit *SU : Nodes) {
2624 if (maxASAP ==
nullptr || getASAP(SU) > getASAP(maxASAP) ||
2625 (getASAP(SU) == getASAP(maxASAP) && SU->
NodeNum > maxASAP->
NodeNum))
2633 while (!
R.empty()) {
2634 if (Order == TopDown) {
2638 while (!
R.empty()) {
2639 SUnit *maxHeight =
nullptr;
2640 for (SUnit *
I : R) {
2641 if (maxHeight ==
nullptr || getHeight(
I) > getHeight(maxHeight))
2643 else if (getHeight(
I) == getHeight(maxHeight) &&
2644 getZeroLatencyHeight(
I) > getZeroLatencyHeight(maxHeight))
2646 else if (getHeight(
I) == getHeight(maxHeight) &&
2647 getZeroLatencyHeight(
I) ==
2648 getZeroLatencyHeight(maxHeight) &&
2649 getMOV(
I) < getMOV(maxHeight))
2654 R.remove(maxHeight);
2655 for (
const auto &OE : DDG->getOutEdges(maxHeight)) {
2656 SUnit *SU = OE.getDst();
2657 if (Nodes.count(SU) == 0)
2661 if (OE.ignoreDependence(
false))
2670 for (
const auto &IE : DDG->getInEdges(maxHeight)) {
2671 SUnit *SU =
IE.getSrc();
2672 if (!
IE.isAntiDep())
2674 if (Nodes.count(SU) == 0)
2683 SmallSetVector<SUnit *, 8>
N;
2690 while (!
R.empty()) {
2691 SUnit *maxDepth =
nullptr;
2692 for (SUnit *
I : R) {
2693 if (maxDepth ==
nullptr || getDepth(
I) > getDepth(maxDepth))
2695 else if (getDepth(
I) == getDepth(maxDepth) &&
2696 getZeroLatencyDepth(
I) > getZeroLatencyDepth(maxDepth))
2698 else if (getDepth(
I) == getDepth(maxDepth) &&
2699 getZeroLatencyDepth(
I) == getZeroLatencyDepth(maxDepth) &&
2700 getMOV(
I) < getMOV(maxDepth))
2706 if (Nodes.isExceedSU(maxDepth)) {
2709 R.insert(Nodes.getNode(0));
2712 for (
const auto &IE : DDG->getInEdges(maxDepth)) {
2713 SUnit *SU =
IE.getSrc();
2714 if (Nodes.count(SU) == 0)
2725 for (
const auto &OE : DDG->getOutEdges(maxDepth)) {
2726 SUnit *SU = OE.getDst();
2727 if (!OE.isAntiDep())
2729 if (Nodes.count(SU) == 0)
2738 SmallSetVector<SUnit *, 8>
N;
2747 dbgs() <<
"Node order: ";
2749 dbgs() <<
" " <<
I->NodeNum <<
" ";
2756bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2763 bool scheduleFound =
false;
2764 std::unique_ptr<HighRegisterPressureDetector> HRPDetector;
2767 std::make_unique<HighRegisterPressureDetector>(Loop.getHeader(), MF);
2768 HRPDetector->init(RegClassInfo);
2771 for (
unsigned II = MII;
II <= MAX_II && !scheduleFound; ++
II) {
2783 int EarlyStart = INT_MIN;
2784 int LateStart = INT_MAX;
2793 dbgs() <<
format(
"\tes: %8x ls: %8x\n", EarlyStart, LateStart));
2795 if (EarlyStart > LateStart)
2796 scheduleFound =
false;
2797 else if (EarlyStart != INT_MIN && LateStart == INT_MAX)
2799 Schedule.
insert(SU, EarlyStart, EarlyStart + (
int)
II - 1,
II);
2800 else if (EarlyStart == INT_MIN && LateStart != INT_MAX)
2802 Schedule.
insert(SU, LateStart, LateStart - (
int)
II + 1,
II);
2803 else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2804 LateStart = std::min(LateStart, EarlyStart + (
int)
II - 1);
2813 scheduleFound = Schedule.
insert(SU, LateStart, EarlyStart,
II);
2815 scheduleFound = Schedule.
insert(SU, EarlyStart, LateStart,
II);
2818 scheduleFound = Schedule.
insert(SU, FirstCycle + getASAP(SU),
2819 FirstCycle + getASAP(SU) +
II - 1,
II);
2827 scheduleFound =
false;
2831 dbgs() <<
"\tCan't schedule\n";
2833 }
while (++NI != NE && scheduleFound);
2838 scheduleFound = DDG->isValidSchedule(Schedule);
2860 if (scheduleFound) {
2861 scheduleFound = LoopPipelinerInfo->shouldUseSchedule(*
this, Schedule);
2866 if (scheduleFound) {
2868 Pass.ORE->emit([&]() {
2869 return MachineOptimizationRemarkAnalysis(
2870 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
2871 <<
"Schedule found with Initiation Interval: "
2873 <<
", MaxStageCount: "
2887 if (!
Reg.isVirtual())
2889 if (
MRI.getVRegDef(
Reg)->getParent() !=
MI.getParent())
2901 if (!
Op.isReg() || !
Op.getReg().isVirtual())
2929 if (Def->getParent() != LoopBB)
2932 if (Def->isCopy()) {
2934 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
2936 CurReg = Def->getOperand(1).getReg();
2937 }
else if (Def->isPHI()) {
2943 }
else if (
TII->getIncrementValue(*Def,
Value)) {
2951 bool OffsetIsScalable;
2952 if (
TII->getMemOperandWithOffset(*Def, BaseOp,
Offset, OffsetIsScalable,
2955 CurReg = BaseOp->
getReg();
2967 if (CurReg == OrgReg)
2979bool SwingSchedulerDAG::computeDelta(
const MachineInstr &
MI,
int &Delta)
const {
2980 const TargetRegisterInfo *
TRI = MF.getSubtarget().getRegisterInfo();
2981 const MachineOperand *BaseOp;
2983 bool OffsetIsScalable;
2988 if (OffsetIsScalable)
2991 if (!BaseOp->
isReg())
3004bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *
MI,
3006 unsigned &OffsetPos,
3012 unsigned BasePosLd, OffsetPosLd;
3018 MachineRegisterInfo &
MRI =
MI->getMF()->getRegInfo();
3019 MachineInstr *
Phi =
MRI.getVRegDef(BaseReg);
3020 if (!Phi || !
Phi->isPHI())
3028 MachineInstr *PrevDef =
MRI.getVRegDef(PrevReg);
3029 if (!PrevDef || PrevDef ==
MI)
3035 unsigned BasePos1 = 0, OffsetPos1 = 0;
3041 int64_t LoadOffset =
MI->getOperand(OffsetPosLd).getImm();
3043 MachineInstr *NewMI = MF.CloneMachineInstr(
MI);
3046 MF.deleteMachineInstr(NewMI);
3051 BasePos = BasePosLd;
3052 OffsetPos = OffsetPosLd;
3064 InstrChanges.find(SU);
3065 if (It != InstrChanges.
end()) {
3066 std::pair<Register, int64_t> RegAndOffset = It->second;
3067 unsigned BasePos, OffsetPos;
3068 if (!
TII->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos))
3070 Register BaseReg =
MI->getOperand(BasePos).getReg();
3076 if (BaseStageNum < DefStageNum) {
3078 int OffsetDiff = DefStageNum - BaseStageNum;
3079 if (DefCycleNum < BaseCycleNum) {
3085 MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
3100 while (Def->isPHI()) {
3101 if (!Visited.
insert(Def).second)
3103 for (
unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
3104 if (Def->getOperand(i + 1).getMBB() == BB) {
3105 Def =
MRI.getVRegDef(Def->getOperand(i).getReg());
3116 int DeltaB, DeltaO, Delta;
3123 int64_t OffsetB, OffsetO;
3124 bool OffsetBIsScalable, OffsetOIsScalable;
3126 if (!
TII->getMemOperandWithOffset(*BaseMI, BaseOpB, OffsetB,
3127 OffsetBIsScalable,
TRI) ||
3128 !
TII->getMemOperandWithOffset(*OtherMI, BaseOpO, OffsetO,
3129 OffsetOIsScalable,
TRI))
3132 if (OffsetBIsScalable || OffsetOIsScalable)
3142 if (!RegB.
isVirtual() || !RegO.isVirtual())
3147 if (!DefB || !DefO || !DefB->
isPHI() || !DefO->
isPHI())
3172 dbgs() <<
"Overlap check:\n";
3173 dbgs() <<
" BaseMI: ";
3175 dbgs() <<
" Base + " << OffsetB <<
" + I * " << Delta
3176 <<
", Len: " << AccessSizeB.
getValue() <<
"\n";
3177 dbgs() <<
" OtherMI: ";
3179 dbgs() <<
" Base + " << OffsetO <<
" + I * " << Delta
3180 <<
", Len: " << AccessSizeO.
getValue() <<
"\n";
3188 int64_t BaseMinAddr = OffsetB;
3189 int64_t OhterNextIterMaxAddr = OffsetO + Delta + AccessSizeO.
getValue() - 1;
3190 if (BaseMinAddr > OhterNextIterMaxAddr) {
3195 int64_t BaseMaxAddr = OffsetB + AccessSizeB.
getValue() - 1;
3196 int64_t OtherNextIterMinAddr = OffsetO + Delta;
3197 if (BaseMaxAddr < OtherNextIterMinAddr) {
3211 if ((!Edge.isOrderDep() && !Edge.isOutputDep()) || Edge.isArtificial() ||
3212 Edge.getDst()->isBoundaryNode())
3218 if (Edge.isOutputDep())
3223 assert(
SI !=
nullptr && DI !=
nullptr &&
"Expecting SUnit with an MI.");
3240void SwingSchedulerDAG::postProcessDAG() {
3241 for (
auto &M : Mutations)
3251 bool forward =
true;
3253 dbgs() <<
"Trying to insert node between " << StartCycle <<
" and "
3254 << EndCycle <<
" II: " <<
II <<
"\n";
3256 if (StartCycle > EndCycle)
3260 int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
3261 for (
int curCycle = StartCycle; curCycle != termCycle;
3262 forward ? ++curCycle : --curCycle) {
3265 ProcItinResources.canReserveResources(*SU, curCycle)) {
3267 dbgs() <<
"\tinsert at cycle " << curCycle <<
" ";
3272 ProcItinResources.reserveResources(*SU, curCycle);
3273 ScheduledInstrs[curCycle].push_back(SU);
3274 InstrToCycle.insert(std::make_pair(SU, curCycle));
3275 if (curCycle > LastCycle)
3276 LastCycle = curCycle;
3277 if (curCycle < FirstCycle)
3278 FirstCycle = curCycle;
3282 dbgs() <<
"\tfailed to insert at cycle " << curCycle <<
" ";
3295 int EarlyCycle = INT_MAX;
3296 while (!Worklist.
empty()) {
3299 if (Visited.
count(PrevSU))
3301 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
3302 if (it == InstrToCycle.end())
3304 EarlyCycle = std::min(EarlyCycle, it->second);
3305 for (
const auto &IE : DDG->
getInEdges(PrevSU))
3306 if (IE.isOrderDep() || IE.isOutputDep())
3319 int LateCycle = INT_MIN;
3320 while (!Worklist.
empty()) {
3325 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
3326 if (it == InstrToCycle.end())
3328 LateCycle = std::max(LateCycle, it->second);
3330 if (OE.isOrderDep() || OE.isOutputDep())
3341 for (
auto &
P : SU->
Preds)
3342 if (
P.getKind() ==
SDep::Anti &&
P.getSUnit()->getInstr()->isPHI())
3343 for (
auto &S :
P.getSUnit()->Succs)
3344 if (S.getKind() ==
SDep::Data && S.getSUnit()->getInstr()->isPHI())
3345 return P.getSUnit();
3358 for (
int cycle =
getFirstCycle(); cycle <= LastCycle; ++cycle) {
3361 if (IE.getSrc() ==
I) {
3366 *MinLateStart = std::min(*MinLateStart, End);
3368 int EarlyStart = cycle + IE.getLatency() - IE.getDistance() *
II;
3369 *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3374 if (OE.getDst() ==
I) {
3379 *MaxEarlyStart = std::max(*MaxEarlyStart, Start);
3381 int LateStart = cycle - OE.getLatency() + OE.getDistance() *
II;
3382 *MinLateStart = std::min(*MinLateStart, LateStart);
3387 for (
const auto &Dep : SU->
Preds) {
3390 if (BE && Dep.getSUnit() == BE && !SU->
getInstr()->
isPHI() &&
3392 *MinLateStart = std::min(*MinLateStart, cycle);
3402 std::deque<SUnit *> &Insts)
const {
3404 bool OrderBeforeUse =
false;
3405 bool OrderAfterDef =
false;
3406 bool OrderBeforeDef =
false;
3407 unsigned MoveDef = 0;
3408 unsigned MoveUse = 0;
3413 for (std::deque<SUnit *>::iterator
I = Insts.begin(), E = Insts.end();
I != E;
3416 if (!MO.isReg() || !MO.getReg().isVirtual())
3420 unsigned BasePos, OffsetPos;
3421 if (ST.getInstrInfo()->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos))
3422 if (
MI->getOperand(BasePos).getReg() == Reg)
3426 std::tie(Reads, Writes) =
3427 (*I)->getInstr()->readsWritesVirtualRegister(Reg);
3429 OrderBeforeUse =
true;
3434 OrderAfterDef =
true;
3436 }
else if (MO.isUse() && Writes &&
stageScheduled(*
I) == StageInst1) {
3438 OrderBeforeUse =
true;
3442 OrderAfterDef =
true;
3446 OrderBeforeUse =
true;
3450 OrderAfterDef =
true;
3455 OrderBeforeUse =
true;
3461 OrderBeforeDef =
true;
3469 if (OE.getDst() != *
I)
3472 OrderBeforeUse =
true;
3479 else if ((OE.isAntiDep() || OE.isOutputDep()) &&
3481 OrderBeforeUse =
true;
3482 if ((MoveUse == 0) || (Pos < MoveUse))
3487 if (IE.getSrc() != *
I)
3489 if ((IE.isAntiDep() || IE.isOutputDep() || IE.isOrderDep()) &&
3491 OrderAfterDef =
true;
3498 if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3499 OrderBeforeUse =
false;
3504 OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3508 if (OrderBeforeUse && OrderAfterDef) {
3509 SUnit *UseSU = Insts.at(MoveUse);
3510 SUnit *DefSU = Insts.at(MoveDef);
3511 if (MoveUse > MoveDef) {
3512 Insts.erase(Insts.begin() + MoveUse);
3513 Insts.erase(Insts.begin() + MoveDef);
3515 Insts.erase(Insts.begin() + MoveDef);
3516 Insts.erase(Insts.begin() + MoveUse);
3526 Insts.push_front(SU);
3528 Insts.push_back(SU);
3536 assert(Phi.isPHI() &&
"Expecting a Phi.");
3543 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3551 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3569 if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3575 if (DMO.getReg() == LoopReg)
3586 if (InstrToCycle.count(IE.getSrc()))
3597 for (
auto &SU : SSD->
SUnits)
3602 while (!Worklist.
empty()) {
3604 if (DoNotPipeline.
count(SU))
3607 DoNotPipeline.
insert(SU);
3614 if (OE.getDistance() == 1)
3617 return DoNotPipeline;
3626 int NewLastCycle = INT_MIN;
3631 NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);
3638 if (IE.getDistance() == 0)
3639 NewCycle = std::max(InstrToCycle[IE.getSrc()], NewCycle);
3644 if (OE.getDistance() == 1)
3645 NewCycle = std::max(InstrToCycle[OE.getDst()], NewCycle);
3647 int OldCycle = InstrToCycle[&SU];
3648 if (OldCycle != NewCycle) {
3649 InstrToCycle[&SU] = NewCycle;
3654 <<
") is not pipelined; moving from cycle " << OldCycle
3655 <<
" to " << NewCycle <<
" Instr:" << *SU.
getInstr());
3680 if (FirstCycle + InitiationInterval <= NewCycle)
3683 NewLastCycle = std::max(NewLastCycle, NewCycle);
3685 LastCycle = NewLastCycle;
3702 int CycleDef = InstrToCycle[&SU];
3703 assert(StageDef != -1 &&
"Instruction should have been scheduled.");
3705 SUnit *Dst = OE.getDst();
3706 if (OE.isAssignedRegDep() && !Dst->isBoundaryNode())
3707 if (OE.getReg().isPhysical()) {
3710 if (InstrToCycle[Dst] <= CycleDef)
3728void SwingSchedulerDAG::checkValidNodeOrder(
const NodeSetType &Circuits)
const {
3731 typedef std::pair<SUnit *, unsigned> UnitIndex;
3732 std::vector<UnitIndex> Indices(
NodeOrder.size(), std::make_pair(
nullptr, 0));
3734 for (
unsigned i = 0, s =
NodeOrder.size(); i < s; ++i)
3735 Indices.push_back(std::make_pair(
NodeOrder[i], i));
3737 auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3738 return std::get<0>(i1) < std::get<0>(i2);
3751 for (
unsigned i = 0, s =
NodeOrder.size(); i < s; ++i) {
3755 bool PredBefore =
false;
3756 bool SuccBefore =
false;
3763 for (
const auto &IE : DDG->getInEdges(SU)) {
3764 SUnit *PredSU = IE.getSrc();
3765 unsigned PredIndex = std::get<1>(
3774 for (
const auto &OE : DDG->getOutEdges(SU)) {
3775 SUnit *SuccSU = OE.getDst();
3781 unsigned SuccIndex = std::get<1>(
3794 Circuits, [SU](
const NodeSet &Circuit) {
return Circuit.
count(SU); });
3799 NumNodeOrderIssues++;
3803 <<
" are scheduled before node " << SU->
NodeNum
3810 dbgs() <<
"Invalid node order found!\n";
3823 for (
SUnit *SU : Instrs) {
3825 for (
unsigned i = 0, e =
MI->getNumOperands(); i < e; ++i) {
3833 InstrChanges.find(SU);
3834 if (It != InstrChanges.
end()) {
3835 unsigned BasePos, OffsetPos;
3837 if (
TII->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos)) {
3841 MI->getOperand(OffsetPos).getImm() - It->second.second;
3854 unsigned TiedUseIdx = 0;
3855 if (
MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3857 OverlapReg =
MI->getOperand(TiedUseIdx).getReg();
3859 NewBaseReg =
MI->getOperand(i).getReg();
3868 const std::deque<SUnit *> &Instrs)
const {
3869 std::deque<SUnit *> NewOrderPhi;
3870 for (
SUnit *SU : Instrs) {
3872 NewOrderPhi.push_back(SU);
3874 std::deque<SUnit *> NewOrderI;
3875 for (
SUnit *SU : Instrs) {
3891 std::deque<SUnit *> &cycleInstrs =
3892 ScheduledInstrs[cycle + (stage * InitiationInterval)];
3894 ScheduledInstrs[cycle].push_front(SU);
3900 for (
int cycle =
getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3901 ScheduledInstrs.erase(cycle);
3911 std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[
Cycle];
3920 os <<
"Num nodes " <<
size() <<
" rec " << RecMII <<
" mov " << MaxMOV
3921 <<
" depth " << MaxDepth <<
" col " << Colocate <<
"\n";
3922 for (
const auto &
I : Nodes)
3923 os <<
" SU(" <<
I->NodeNum <<
") " << *(
I->getInstr());
3927#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3934 for (
SUnit *CI : cycleInstrs->second) {
3936 os <<
"(" << CI->
NodeNum <<
") ";
3947void ResourceManager::dumpMRT()
const {
3951 std::stringstream SS;
3953 SS << std::setw(4) <<
"Slot";
3954 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I)
3955 SS << std::setw(3) <<
I;
3956 SS << std::setw(7) <<
"#Mops"
3958 for (
int Slot = 0; Slot < InitiationInterval; ++Slot) {
3959 SS << std::setw(4) << Slot;
3960 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I)
3961 SS << std::setw(3) << MRT[Slot][
I];
3962 SS << std::setw(7) << NumScheduledMops[Slot] <<
"\n";
3971 unsigned ProcResourceID = 0;
3975 assert(SM.getNumProcResourceKinds() < 64 &&
3976 "Too many kinds of resources, unsupported");
3979 Masks.
resize(SM.getNumProcResourceKinds());
3980 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
3982 if (
Desc.SubUnitsIdxBegin)
3984 Masks[
I] = 1ULL << ProcResourceID;
3988 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
3990 if (!
Desc.SubUnitsIdxBegin)
3992 Masks[
I] = 1ULL << ProcResourceID;
3993 for (
unsigned U = 0; U <
Desc.NumUnits; ++U)
3994 Masks[
I] |= Masks[
Desc.SubUnitsIdxBegin[U]];
3999 dbgs() <<
"ProcResourceDesc:\n";
4000 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
4002 dbgs() <<
format(
" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
4003 ProcResource->
Name,
I, Masks[
I],
4006 dbgs() <<
" -----------------\n";
4014 dbgs() <<
"canReserveResources:\n";
4017 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
4023 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
4029 reserveResources(SCDesc,
Cycle);
4030 bool Result = !isOverbooked();
4031 unreserveResources(SCDesc,
Cycle);
4040 dbgs() <<
"reserveResources:\n";
4043 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
4049 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
4055 reserveResources(SCDesc,
Cycle);
4060 dbgs() <<
"reserveResources: done!\n\n";
4071 ++MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
4074 ++NumScheduledMops[positiveModulo(
C, InitiationInterval)];
4083 --MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
4086 --NumScheduledMops[positiveModulo(
C, InitiationInterval)];
4089bool ResourceManager::isOverbooked()
const {
4091 for (
int Slot = 0;
Slot < InitiationInterval; ++
Slot) {
4092 for (
unsigned I = 1,
E = SM.getNumProcResourceKinds();
I <
E; ++
I) {
4093 const MCProcResourceDesc *
Desc = SM.getProcResource(
I);
4094 if (MRT[Slot][
I] >
Desc->NumUnits)
4097 if (NumScheduledMops[Slot] > IssueWidth)
4103int ResourceManager::calculateResMIIDFA()
const {
4108 FuncUnitSorter FUS = FuncUnitSorter(*ST);
4109 for (SUnit &SU : DAG->
SUnits)
4110 FUS.calcCriticalResources(*SU.
getInstr());
4111 PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter>
4114 for (SUnit &SU : DAG->
SUnits)
4121 while (!FuncUnitOrder.empty()) {
4122 MachineInstr *
MI = FuncUnitOrder.top();
4123 FuncUnitOrder.pop();
4130 unsigned ReservedCycles = 0;
4131 auto *RI = Resources.
begin();
4132 auto *RE = Resources.
end();
4134 dbgs() <<
"Trying to reserve resource for " << NumCycles
4135 <<
" cycles for \n";
4138 for (
unsigned C = 0;
C < NumCycles; ++
C)
4140 if ((*RI)->canReserveResources(*
MI)) {
4141 (*RI)->reserveResources(*
MI);
4148 <<
", NumCycles:" << NumCycles <<
"\n");
4150 for (
unsigned C = ReservedCycles;
C < NumCycles; ++
C) {
4152 <<
"NewResource created to reserve resources"
4155 assert(NewResource->canReserveResources(*
MI) &&
"Reserve error.");
4156 NewResource->reserveResources(*
MI);
4157 Resources.
push_back(std::unique_ptr<DFAPacketizer>(NewResource));
4161 int Resmii = Resources.
size();
4168 return calculateResMIIDFA();
4175 for (
SUnit &SU : DAG->SUnits) {
4187 <<
" WriteProcRes: ";
4192 make_range(STI->getWriteProcResBegin(SCDesc),
4193 STI->getWriteProcResEnd(SCDesc))) {
4197 SM.getProcResource(PRE.ProcResourceIdx);
4198 dbgs() <<
Desc->Name <<
": " << PRE.ReleaseAtCycle <<
", ";
4201 ResourceCount[PRE.ProcResourceIdx] += PRE.ReleaseAtCycle;
4206 int Result = (NumMops + IssueWidth - 1) / IssueWidth;
4209 dbgs() <<
"#Mops: " << NumMops <<
", "
4210 <<
"IssueWidth: " << IssueWidth <<
", "
4211 <<
"Cycles: " << Result <<
"\n";
4216 std::stringstream SS;
4217 SS << std::setw(2) <<
"ID" << std::setw(16) <<
"Name" << std::setw(10)
4218 <<
"Units" << std::setw(10) <<
"Consumed" << std::setw(10) <<
"Cycles"
4223 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
4225 int Cycles = (ResourceCount[
I] +
Desc->NumUnits - 1) /
Desc->NumUnits;
4228 std::stringstream SS;
4229 SS << std::setw(2) <<
I << std::setw(16) <<
Desc->Name << std::setw(10)
4230 <<
Desc->NumUnits << std::setw(10) << ResourceCount[
I]
4231 << std::setw(10) << Cycles <<
"\n";
4235 if (Cycles > Result)
4242 InitiationInterval =
II;
4243 DFAResources.clear();
4244 DFAResources.resize(
II);
4245 for (
auto &
I : DFAResources)
4246 I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
4249 NumScheduledMops.clear();
4250 NumScheduledMops.resize(
II);
4254 if (Pred.isArtificial() || Dst->isBoundaryNode())
4259 return IgnoreAnti && (Pred.getKind() ==
SDep::Kind::Anti || Distance != 0);
4262SwingSchedulerDDG::SwingSchedulerDDGEdges &
4263SwingSchedulerDDG::getEdges(
const SUnit *SU) {
4265 return EntrySUEdges;
4271const SwingSchedulerDDG::SwingSchedulerDDGEdges &
4272SwingSchedulerDDG::getEdges(
const SUnit *SU)
const {
4274 return EntrySUEdges;
4280void SwingSchedulerDDG::addEdge(
const SUnit *SU,
4281 const SwingSchedulerDDGEdge &
Edge) {
4283 "Validation-only edges are not expected here.");
4284 auto &Edges = getEdges(SU);
4285 if (
Edge.getSrc() == SU)
4286 Edges.Succs.push_back(
Edge);
4288 Edges.Preds.push_back(
Edge);
4291void SwingSchedulerDDG::initEdges(SUnit *SU) {
4292 for (
const auto &PI : SU->
Preds) {
4293 SwingSchedulerDDGEdge
Edge(SU, PI,
false,
4298 for (
const auto &SI : SU->
Succs) {
4299 SwingSchedulerDDGEdge
Edge(SU, SI,
true,
4307 : EntrySU(EntrySU), ExitSU(ExitSU) {
4308 EdgesVec.resize(SUnits.size());
4313 for (
auto &SU : SUnits)
4317 for (
SUnit &SU : SUnits) {
4322 for (
SUnit *Dst : *OD) {
4325 Edge.setDistance(1);
4326 ValidationOnlyEdges.push_back(Edge);
4332const SwingSchedulerDDG::EdgesType &
4334 return getEdges(SU).Preds;
4337const SwingSchedulerDDG::EdgesType &
4339 return getEdges(SU).Succs;
4346 auto ExpandCycle = [&](
SUnit *SU) {
4353 SUnit *Src = Edge.getSrc();
4354 SUnit *Dst = Edge.getDst();
4355 if (!Src->isInstr() || !Dst->isInstr())
4357 int CycleSrc = ExpandCycle(Src);
4358 int CycleDst = ExpandCycle(Dst);
4359 int MaxLateStart = CycleDst + Edge.getDistance() *
II - Edge.getLatency();
4360 if (CycleSrc > MaxLateStart) {
4362 dbgs() <<
"Validation failed for edge from " << Src->NodeNum <<
" to "
4363 << Dst->NodeNum <<
"\n";
4373 for (
SUnit &SU : SUnits) {
4402 !
TII->isGlobalMemoryObject(FromMI) &&
4420 const auto DumpSU = [](
const SUnit *SU) {
4421 std::ostringstream OSS;
4422 OSS <<
"SU(" << SU->
NodeNum <<
")";
4426 dbgs() <<
" Loop carried edges from " << DumpSU(SU) <<
"\n"
4428 for (
SUnit *Dst : *Order)
4429 dbgs() <<
" " << DumpSU(Dst) <<
"\n";
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::optional< unsigned > getTag(const TargetRegisterInfo *TRI, const MachineInstr &MI, const LoadInfo &LI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file defines the DenseMap class.
A common definition of LaneBitmask for use in TableGen and CodeGen.
static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)
print mir2vec MIR2Vec Vocabulary Printer Pass
static cl::opt< int > SwpForceII("pipeliner-force-ii", cl::desc("Force pipeliner to use specified II."), cl::Hidden, cl::init(-1))
A command line argument to force pipeliner to use specified initial interval.
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
static bool hasPHICycleDFS(unsigned Reg, const DenseMap< unsigned, SmallVector< unsigned, 2 > > &PhiDeps, SmallSet< unsigned, 8 > &Visited, SmallSet< unsigned, 8 > &RecStack)
Depth-first search to detect cycles among PHI dependencies.
static cl::opt< bool > MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false), cl::desc("Use the MVE code generator for software pipelining"))
static cl::opt< int > RegPressureMargin("pipeliner-register-pressure-margin", cl::Hidden, cl::init(5), cl::desc("Margin representing the unused percentage of " "the register pressure limit"))
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, Register &InitVal, Register &LoopVal)
Return the register values for the operands of a Phi instruction.
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
static void computeScheduledInsts(const SwingSchedulerDAG *SSD, SMSchedule &Schedule, std::vector< MachineInstr * > &OrderedInsts, DenseMap< MachineInstr *, unsigned > &Stages)
Create an instruction stream that represents a single iteration and stage of each instruction.
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
static Register getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
static bool findLoopIncrementValue(const MachineOperand &Op, int &Value)
When Op is a value that is incremented recursively in a loop and there is a unique instruction that i...
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::desc("Ignore RecMII"))
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
static cl::opt< unsigned > SwpMaxNumStores("pipeliner-max-num-stores", cl::desc("Maximum number of stores allwed in the target loop."), cl::Hidden, cl::init(200))
A command line argument to limit the number of store instructions in the target basic block.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
static bool hasPHICycle(const MachineBasicBlock *LoopHeader, const MachineRegisterInfo &MRI)
static cl::opt< WindowSchedulingFlag > WindowSchedulingOption("window-sched", cl::Hidden, cl::init(WindowSchedulingFlag::WS_On), cl::desc("Set how to use window scheduling algorithm."), cl::values(clEnumValN(WindowSchedulingFlag::WS_Off, "off", "Turn off window algorithm."), clEnumValN(WindowSchedulingFlag::WS_On, "on", "Use window algorithm after SMS algorithm fails."), clEnumValN(WindowSchedulingFlag::WS_Force, "force", "Use window algorithm instead of SMS algorithm.")))
A command line argument to set the window scheduling option.
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
static bool hasLoopCarriedMemDep(const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst, BatchAAResults &BAA, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, const SwingSchedulerDAG *SSD, bool PerformCheapCheck)
Returns true if there is a loop-carried order dependency from Src to Dst.
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
static cl::opt< int > SwpIISearchRange("pipeliner-ii-search-range", cl::desc("Range to search for II"), cl::Hidden, cl::init(10))
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited, SwingSchedulerDDG *DDG)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
static cl::opt< bool > LimitRegPressure("pipeliner-register-pressure", cl::Hidden, cl::init(false), cl::desc("Limit register pressure of scheduled loop"))
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
static Register findUniqueOperandDefinedInLoop(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
Promote Memory to Register
This file provides utility analysis objects describing memory locations.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PriorityQueue class.
Remove Loads Into Fake Uses
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Target-Independent Code Generator Pass Configuration Options pass.
Add loop-carried chain dependencies.
void computeDependencies()
The main function to compute loop-carried order-dependencies.
const BitVector & getLoopCarried(unsigned Idx) const
LoopCarriedOrderDepsTracker(SwingSchedulerDAG *SSD, BatchAAResults *BAA, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
AttributeList getAttributes() const
Return the attribute list for this Function.
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
bool isEmpty() const
Returns true if there are no itineraries.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
TypeSize getValue() const
Represents a single loop in the control flow graph.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI StringRef getString() const
MachineInstrBundleIterator< const MachineInstr > const_iterator
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
filtered_mop_range all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool isRegSequence() const
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
const Value * getValue() const
Return the base address of the memory access.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
The main class in the implementation of the target independent software pipeliner pass.
bool runOnMachineFunction(MachineFunction &MF) override
The "main" function for implementing Swing Modulo Scheduling.
const TargetInstrInfo * TII
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineDominatorTree * MDT
const MachineLoopInfo * MLI
MachineOptimizationRemarkEmitter * ORE
RegisterClassInfo RegClassInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_instr_iterator< true, false, false, true > use_instr_iterator
use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the specified register,...
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
Expand the kernel using modulo variable expansion algorithm (MVE).
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
SUnit * getNode(unsigned i) const
void print(raw_ostream &os) const
void setRecMII(unsigned mii)
unsigned count(SUnit *SU) const
void setColocate(unsigned c)
int compareRecMII(NodeSet &RHS)
LLVM_DUMP_METHOD void dump() const
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A reimplementation of ModuloScheduleExpander.
PointerIntPair - This class implements a pair of a pointer and small integer.
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void addLiveRegs(ArrayRef< VRegMaskOrUnit > Regs)
Force liveness of virtual registers or physical register units.
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
int calculateResMII() const
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
void init(int II)
Initialize resources with the initiation interval II.
bool canReserveResources(SUnit &SU, int Cycle)
Check if the resources occupied by a machine instruction are available in the current state.
Kind
These are the different kinds of scheduling dependencies.
@ Order
Any other ordering dependency.
@ Anti
A register anti-dependence (aka WAR).
@ Data
Regular data dependence (aka true-dependence).
void setLatency(unsigned Lat)
Sets the latency for this edge.
@ Barrier
An unknown scheduling barrier.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
This class represents the scheduled code.
std::deque< SUnit * > reorderInstructions(const SwingSchedulerDAG *SSD, const std::deque< SUnit * > &Instrs) const
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
void dump() const
Utility function used for debugging to print the schedule.
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
int earliestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the earliest scheduled instruction in the dependence chain.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU, const SwingSchedulerDDG *DDG) const
Return true if all scheduled predecessors are loop-carried output/order dependencies.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts) const
Order the instructions within a cycle so that the definitions occur before the uses.
bool isValidSchedule(SwingSchedulerDAG *SSD)
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
DenseMap< int, std::deque< SUnit * > >::const_iterator const_sched_iterator
bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO) const
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
SmallPtrSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Determine transitive dependences of unpipelineable instructions.
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const
Return true if the scheduled Phi has a loop carried operand.
int latestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the latest scheduled instruction in the dependence chain.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
LLVM_ABI void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
unsigned short Latency
Node latency.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
bool hasPhysRegDefs
Has physreg defs that are being used.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
LLVM_ABI bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock * BB
The block in which to insert instructions.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
void dump() const override
LLVM_ABI void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
LLVM_ABI bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SUnit EntrySU
Special node for the region entry.
MachineFunction & MF
Machine function.
SUnit ExitSU
Special node for the region exit.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void insert_range(Range &&R)
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
typename vector_type::const_iterator iterator
bool contains(const_arg_type key) const
Check if the SetVector contains the given key.
void clear()
Completely clear the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
const SwingSchedulerDDG * getDDG() const
void finishBlock() override
Clean up after the software pipeliner runs.
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const
Return true for an order or output dependence that is loop carried potentially.
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
bool mayOverlapInLaterIter(const MachineInstr *BaseMI, const MachineInstr *OtherMI) const
Return false if there is no overlap between the region accessed by BaseMI in an iteration and the reg...
Register getInstrBaseReg(SUnit *SU) const
Return the new base register that was stored away for the changed instruction.
Represents a dependence between two instruction.
SUnit * getDst() const
Returns the SUnit to which the edge points (destination node).
bool ignoreDependence(bool IgnoreAnti) const
Returns true for DDG nodes that we ignore when computing the cost functions.
SUnit * getSrc() const
Returns the SUnit from which the edge comes (source node).
This class provides APIs to retrieve edges from/to an SUnit node, with a particular focus on loop-car...
SwingSchedulerDDG(std::vector< SUnit > &SUnits, SUnit *EntrySU, SUnit *ExitSU, const LoopCarriedEdges &LCE)
const EdgesType & getInEdges(const SUnit *SU) const
bool isValidSchedule(const SMSchedule &Schedule) const
Check if Schedule doesn't violate the validation-only dependencies.
const EdgesType & getOutEdges(const SUnit *SU) const
Object returned by analyzeLoopForPipelining.
virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const =0
Return true if the given instruction should not be pipelined and should be ignored.
TargetInstrInfo - Interface to description of machine instruction set.
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don't consume any machine resources in their current form.
virtual DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const
Create machine specific model for scheduling.
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const
Sometimes, it is possible for the target to tell, even without aliasing information,...
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool enableMachinePipeliner() const
True if the subtarget should run MachinePipeliner.
virtual bool useDFAforSMS() const
Default to DFA for resource management, return false when target will use ProcResource in InstrSchedM...
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Wrapper class representing a virtual register or register unit.
constexpr bool isVirtualReg() const
constexpr MCRegUnit asMCRegUnit() const
constexpr Register asVirtualReg() const
The main class in the implementation of the target independent window scheduler.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
@ Valid
The data is already valid.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< DefNode * > Def
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
std::set< NodeId > NodeSet
friend class Instruction
Iterator for Instructions in a `BasicBlock.
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void stable_sort(R &&Range)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
@ WS_Force
Use window algorithm after SMS algorithm fails.
@ WS_On
Turn off window algorithm.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
cl::opt< bool > SwpEnableCopyToPhi
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
LLVM_ABI char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.
@ Increment
Incrementally increasing token ID.
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
LLVM_ABI bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This class holds an SUnit corresponding to a memory operation and other information related to the in...
const Value * MemOpValue
The value of a memory operand.
SmallVector< const Value *, 2 > UnderlyingObjs
bool isTriviallyDisjoint(const SUnitWithMemInfo &Other) const
int64_t MemOpOffset
The offset of a memory operand.
bool IsAllIdentified
True if all the underlying objects are identified.
SUnitWithMemInfo(SUnit *SU)
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
uint64_t FuncUnits
Bitmask representing a set of functional units.
static constexpr LaneBitmask getNone()
Represents loop-carried dependencies.
SmallSetVector< SUnit *, 8 > OrderDep
const OrderDep * getOrderDepOrNull(SUnit *Key) const
void modifySUnits(std::vector< SUnit > &SUnits, const TargetInstrInfo *TII)
Adds some edges to the original DAG that correspond to loop-carried dependencies.
void dump(SUnit *SU, const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI) const
Define a kind of processor resource that will be modeled by the scheduler.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.