72#include "llvm/Config/llvm-config.h"
100#define DEBUG_TYPE "pipeliner"
102STATISTIC(NumTrytoPipeline,
"Number of loops that we attempt to pipeline");
103STATISTIC(NumPipelined,
"Number of loops software pipelined");
104STATISTIC(NumNodeOrderIssues,
"Number of node order issues found");
105STATISTIC(NumFailBranch,
"Pipeliner abort due to unknown branch");
106STATISTIC(NumFailLoop,
"Pipeliner abort due to unsupported loop");
107STATISTIC(NumFailPreheader,
"Pipeliner abort due to missing preheader");
108STATISTIC(NumFailLargeMaxMII,
"Pipeliner abort due to MaxMII too large");
109STATISTIC(NumFailZeroMII,
"Pipeliner abort due to zero MII");
110STATISTIC(NumFailNoSchedule,
"Pipeliner abort due to no schedule found");
111STATISTIC(NumFailZeroStage,
"Pipeliner abort due to zero stage");
112STATISTIC(NumFailLargeMaxStage,
"Pipeliner abort due to too many stages");
113STATISTIC(NumFailTooManyStores,
"Pipeliner abort due to too many stores");
117 cl::desc(
"Enable Software Pipelining"));
126 cl::desc(
"Size limit for the MII."),
132 cl::desc(
"Force pipeliner to use specified II."),
138 cl::desc(
"Maximum stages allowed in the generated scheduled."),
145 cl::desc(
"Prune dependences between unrelated Phi nodes."),
152 cl::desc(
"Prune loop carried order dependences."),
170 cl::desc(
"Instead of emitting the pipelined code, annotate instructions "
171 "with the generated schedule for feeding into the "
172 "-modulo-schedule-test pass"));
177 "Use the experimental peeling code generator for software pipelining"));
185 cl::desc(
"Limit register pressure of scheduled loop"));
190 cl::desc(
"Margin representing the unused percentage of "
191 "the register pressure limit"));
195 cl::desc(
"Use the MVE code generator for software pipelining"));
200 "pipeliner-max-num-stores",
208 cl::desc(
"Enable CopyToPhi DAG Mutation"));
213 "pipeliner-force-issue-width",
220 cl::desc(
"Set how to use window scheduling algorithm."),
222 "Turn off window algorithm."),
224 "Use window algorithm after SMS algorithm fails."),
226 "Use window algorithm instead of SMS algorithm.")));
228unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
236 "Modulo Software Pipelining",
false,
false)
278 enum class InstrTag {
287 TaggedSUnit(
SUnit *SU, InstrTag Tag)
290 InstrTag
getTag()
const {
return InstrTag(getInt()); }
294 struct LoadStoreChunk {
298 void append(
SUnit *SU);
303 std::vector<SUnit> &SUnits;
309 std::vector<BitVector> LoopCarried;
322 std::vector<TaggedSUnit> TaggedSUnits;
336 return LoopCarried[Idx];
341 std::optional<InstrTag> getInstrTag(
SUnit *SU)
const;
343 void addLoopCarriedDepenenciesForChunks(
const LoadStoreChunk &From,
344 const LoadStoreChunk &To);
355 bool PerformCheapCheck =
false);
357 void computeDependenciesAux();
388 TII =
MF->getSubtarget().getInstrInfo();
391 for (
const auto &L : *
MLI)
401bool MachinePipeliner::scheduleLoop(
MachineLoop &L) {
403 for (
const auto &InnerLoop : L)
404 Changed |= scheduleLoop(*InnerLoop);
416 setPragmaPipelineOptions(L);
417 if (!canPipelineLoop(L)) {
421 L.getStartLoc(), L.getHeader())
422 <<
"Failed to pipeline loop";
425 LI.LoopPipelinerInfo.reset();
430 if (useSwingModuloScheduler())
431 Changed = swingModuloScheduler(L);
433 if (useWindowScheduler(
Changed))
434 Changed = runWindowScheduler(L);
436 LI.LoopPipelinerInfo.reset();
440void MachinePipeliner::setPragmaPipelineOptions(
MachineLoop &L) {
445 MachineBasicBlock *LBLK =
L.getTopBlock();
458 MDNode *LoopID = TI->
getMetadata(LLVMContext::MD_loop);
459 if (LoopID ==
nullptr)
476 if (S->
getString() ==
"llvm.loop.pipeline.initiationinterval") {
478 "Pipeline initiation interval hint metadata should have two operands.");
482 }
else if (S->
getString() ==
"llvm.loop.pipeline.disable") {
491bool MachinePipeliner::canPipelineLoop(
MachineLoop &L) {
492 if (
L.getNumBlocks() != 1) {
494 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
495 L.getStartLoc(),
L.getHeader())
496 <<
"Not a single basic block: "
497 <<
ore::NV(
"NumBlocks",
L.getNumBlocks());
504 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
505 L.getStartLoc(),
L.getHeader())
506 <<
"Disabled by Pragma.";
516 if (
TII->analyzeBranch(*
L.getHeader(),
LI.TBB,
LI.FBB,
LI.BrCond)) {
517 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeBranch, can NOT pipeline Loop\n");
520 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
521 L.getStartLoc(),
L.getHeader())
522 <<
"The branch can't be understood";
527 LI.LoopInductionVar =
nullptr;
528 LI.LoopCompare =
nullptr;
529 LI.LoopPipelinerInfo =
TII->analyzeLoopForPipelining(
L.getTopBlock());
530 if (!
LI.LoopPipelinerInfo) {
531 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeLoop, can NOT pipeline Loop\n");
534 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
535 L.getStartLoc(),
L.getHeader())
536 <<
"The loop structure is not supported";
541 if (!
L.getLoopPreheader()) {
542 LLVM_DEBUG(
dbgs() <<
"Preheader not found, can NOT pipeline Loop\n");
545 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
546 L.getStartLoc(),
L.getHeader())
547 <<
"No loop preheader found";
552 unsigned NumStores = 0;
553 for (MachineInstr &
MI : *
L.getHeader())
558 NumFailTooManyStores++;
560 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
561 L.getStartLoc(),
L.getHeader())
562 <<
"Too many store instructions in the loop: "
563 <<
ore::NV(
"NumStores", NumStores) <<
" > "
570 preprocessPhiNodes(*
L.getHeader());
575 MachineRegisterInfo &
MRI =
MF->getRegInfo();
579 for (MachineInstr &PI :
B.phis()) {
580 MachineOperand &DefOp = PI.getOperand(0);
582 auto *RC =
MRI.getRegClass(DefOp.
getReg());
584 for (
unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
585 MachineOperand &RegOp = PI.getOperand(i);
592 MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
609bool MachinePipeliner::swingModuloScheduler(
MachineLoop &L) {
610 assert(
L.getBlocks().size() == 1 &&
"SMS works on single blocks only.");
613 SwingSchedulerDAG SMS(
617 MachineBasicBlock *
MBB =
L.getHeader();
635 return SMS.hasNewSchedule();
649bool MachinePipeliner::runWindowScheduler(
MachineLoop &L) {
657 Context.RegClassInfo->runOnMachineFunction(*
MF);
662bool MachinePipeliner::useSwingModuloScheduler() {
667bool MachinePipeliner::useWindowScheduler(
bool Changed) {
674 "llvm.loop.pipeline.initiationinterval is set.\n");
682void SwingSchedulerDAG::setMII(
unsigned ResMII,
unsigned RecMII) {
685 else if (II_setByPragma > 0)
686 MII = II_setByPragma;
688 MII = std::max(ResMII, RecMII);
691void SwingSchedulerDAG::setMAX_II() {
694 else if (II_setByPragma > 0)
695 MAX_II = II_setByPragma;
705 updatePhiDependences();
706 Topo.InitDAGTopologicalSorting();
712 dbgs() <<
"===== Loop Carried Edges Begin =====\n";
715 dbgs() <<
"===== Loop Carried Edges End =====\n";
718 NodeSetType NodeSets;
719 findCircuits(NodeSets);
720 NodeSetType Circuits = NodeSets;
723 unsigned ResMII = calculateResMII();
724 unsigned RecMII = calculateRecMII(NodeSets);
732 setMII(ResMII, RecMII);
736 <<
" (rec=" << RecMII <<
", res=" << ResMII <<
")\n");
742 Pass.ORE->emit([&]() {
744 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
745 <<
"Invalid Minimal Initiation Interval: 0";
753 <<
", we don't pipeline large loops\n");
754 NumFailLargeMaxMII++;
755 Pass.ORE->emit([&]() {
757 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
758 <<
"Minimal Initiation Interval too large: "
759 <<
ore::NV(
"MII", (
int)MII) <<
" > "
761 <<
"Refer to -pipeliner-max-mii.";
766 computeNodeFunctions(NodeSets);
768 registerPressureFilter(NodeSets);
770 colocateNodeSets(NodeSets);
772 checkNodeSets(NodeSets);
775 for (
auto &
I : NodeSets) {
776 dbgs() <<
" Rec NodeSet ";
783 groupRemainingNodes(NodeSets);
785 removeDuplicateNodes(NodeSets);
788 for (
auto &
I : NodeSets) {
789 dbgs() <<
" NodeSet ";
794 computeNodeOrder(NodeSets);
797 checkValidNodeOrder(Circuits);
800 Scheduled = schedulePipeline(Schedule);
805 Pass.ORE->emit([&]() {
807 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
808 <<
"Unable to find schedule";
815 if (numStages == 0) {
818 Pass.ORE->emit([&]() {
820 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
821 <<
"No need to pipeline - no overlapped iterations in schedule.";
828 <<
" : too many stages, abort\n");
829 NumFailLargeMaxStage++;
830 Pass.ORE->emit([&]() {
832 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
833 <<
"Too many stages in schedule: "
834 <<
ore::NV(
"numStages", (
int)numStages) <<
" > "
836 <<
". Refer to -pipeliner-max-stages.";
841 Pass.ORE->emit([&]() {
844 <<
"Pipelined succesfully!";
849 std::vector<MachineInstr *> OrderedInsts;
853 OrderedInsts.push_back(SU->getInstr());
854 Cycles[SU->getInstr()] =
Cycle;
859 for (
auto &KV : NewMIs) {
860 Cycles[KV.first] = Cycles[KV.second];
861 Stages[KV.first] = Stages[KV.second];
862 NewInstrChanges[KV.first] = InstrChanges[
getSUnit(KV.first)];
869 "Cannot serialize a schedule with InstrChanges!");
879 LoopPipelinerInfo->isMVEExpanderSupported() &&
893 for (
auto &KV : NewMIs)
894 MF.deleteMachineInstr(KV.second);
905 assert(Phi.isPHI() &&
"Expecting a Phi.");
909 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
910 if (Phi.getOperand(i + 1).getMBB() !=
Loop)
911 InitVal = Phi.getOperand(i).getReg();
913 LoopVal = Phi.getOperand(i).getReg();
915 assert(InitVal && LoopVal &&
"Unexpected Phi structure.");
921 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
922 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
923 return Phi.getOperand(i).getReg();
932 while (!Worklist.
empty()) {
934 for (
const auto &
SI : SU->
Succs) {
937 if (Visited.
count(SuccSU))
950 if (!getUnderlyingObjects())
975bool SUnitWithMemInfo::getUnderlyingObjects() {
977 if (!
MI->hasOneMemOperand())
999 if (Src.isTriviallyDisjoint(Dst))
1006 if (PerformCheapCheck) {
1013 int64_t Offset1, Offset2;
1014 bool Offset1IsScalable, Offset2IsScalable;
1015 if (
TII->getMemOperandWithOffset(SrcMI, BaseOp1, Offset1, Offset1IsScalable,
1017 TII->getMemOperandWithOffset(DstMI, BaseOp2, Offset2, Offset2IsScalable,
1020 Offset1IsScalable == Offset2IsScalable &&
1021 (
int)Offset1 < (
int)Offset2) {
1022 assert(
TII->areMemAccessesTriviallyDisjoint(SrcMI, DstMI) &&
1023 "What happened to the chain edge?");
1035 if (Src.isUnknown() || Dst.isUnknown())
1037 if (Src.MemOpValue == Dst.MemOpValue && Src.MemOpOffset <= Dst.MemOpOffset)
1048 for (
const Value *SrcObj : Src.UnderlyingObjs)
1049 for (
const Value *DstObj : Dst.UnderlyingObjs)
1057void LoopCarriedOrderDepsTracker::LoadStoreChunk::append(SUnit *SU) {
1059 if (!
MI->mayLoadOrStore())
1061 (
MI->mayStore() ? Stores : Loads).emplace_back(SU);
1067 : DAG(SSD), BAA(BAA), SUnits(DAG->SUnits), N(SUnits.
size()),
1068 LoopCarried(N,
BitVector(N)), TII(TII), TRI(TRI) {}
1072 for (
auto &SU : SUnits) {
1073 auto Tagged = getInstrTag(&SU);
1078 TaggedSUnits.emplace_back(&SU, *Tagged);
1081 computeDependenciesAux();
1084std::optional<LoopCarriedOrderDepsTracker::InstrTag>
1085LoopCarriedOrderDepsTracker::getInstrTag(
SUnit *SU)
const {
1087 if (
TII->isGlobalMemoryObject(
MI))
1088 return InstrTag::Barrier;
1090 if (
MI->mayStore() ||
1091 (
MI->mayLoad() && !
MI->isDereferenceableInvariantLoad()))
1092 return InstrTag::LoadOrStore;
1094 if (
MI->mayRaiseFPException())
1095 return InstrTag::FPExceptions;
1097 return std::nullopt;
1100void LoopCarriedOrderDepsTracker::addDependenciesBetweenSUs(
1101 const SUnitWithMemInfo &Src,
const SUnitWithMemInfo &Dst,
1102 bool PerformCheapCheck) {
1104 if (Src.SU == Dst.SU)
1108 LoopCarried[Src.SU->NodeNum].set(Dst.SU->NodeNum);
1111void LoopCarriedOrderDepsTracker::addLoopCarriedDepenenciesForChunks(
1112 const LoadStoreChunk &From,
const LoadStoreChunk &To) {
1114 for (
const SUnitWithMemInfo &Src : From.Loads)
1115 for (
const SUnitWithMemInfo &Dst : To.Stores)
1117 addDependenciesBetweenSUs(Src, Dst, Src.SU->NodeNum < Dst.SU->NodeNum);
1120 for (
const SUnitWithMemInfo &Src : From.Stores)
1121 for (
const SUnitWithMemInfo &Dst : To.Loads)
1122 addDependenciesBetweenSUs(Src, Dst);
1125 for (
const SUnitWithMemInfo &Src : From.Stores)
1126 for (
const SUnitWithMemInfo &Dst : To.Stores)
1127 addDependenciesBetweenSUs(Src, Dst);
1130void LoopCarriedOrderDepsTracker::computeDependenciesAux() {
1132 for (
const auto &TSU : TaggedSUnits) {
1133 InstrTag
Tag = TSU.getTag();
1134 SUnit *SU = TSU.getPointer();
1136 case InstrTag::Barrier:
1137 Chunks.emplace_back();
1139 case InstrTag::LoadOrStore:
1140 Chunks.back().append(SU);
1142 case InstrTag::FPExceptions:
1151 for (
const LoadStoreChunk &Chunk : Chunks)
1152 addLoopCarriedDepenenciesForChunks(Chunk, Chunk);
1164LoopCarriedEdges SwingSchedulerDAG::addLoopCarriedDependences() {
1165 LoopCarriedEdges LCE;
1169 LCODTracker.computeDependencies();
1170 for (
unsigned I = 0;
I != SUnits.size();
I++)
1171 for (
const int Succ : LCODTracker.getLoopCarried(
I).set_bits())
1184void SwingSchedulerDAG::updatePhiDependences() {
1186 const TargetSubtargetInfo &
ST = MF.getSubtarget<TargetSubtargetInfo>();
1189 for (SUnit &
I : SUnits) {
1194 MachineInstr *
MI =
I.getInstr();
1196 for (
const MachineOperand &MO :
MI->operands()) {
1203 UI =
MRI.use_instr_begin(
Reg),
1204 UE =
MRI.use_instr_end();
1206 MachineInstr *
UseMI = &*UI;
1207 SUnit *SU = getSUnit(
UseMI);
1217 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
1222 }
else if (MO.isUse()) {
1225 if (
DefMI ==
nullptr)
1227 SUnit *SU = getSUnit(
DefMI);
1232 ST.adjustSchedDependency(SU, 0, &
I, MO.getOperandNo(), Dep,
1239 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
1248 for (
auto &PI :
I.Preds) {
1249 MachineInstr *PMI = PI.getSUnit()->getInstr();
1251 if (
I.getInstr()->isPHI()) {
1260 for (
const SDep &
D : RemoveDeps)
1267void SwingSchedulerDAG::changeDependences() {
1271 for (SUnit &
I : SUnits) {
1272 unsigned BasePos = 0, OffsetPos = 0;
1274 int64_t NewOffset = 0;
1275 if (!canUseLastOffsetValue(
I.getInstr(), BasePos, OffsetPos, NewBase,
1280 Register OrigBase =
I.getInstr()->getOperand(BasePos).getReg();
1281 MachineInstr *
DefMI =
MRI.getUniqueVRegDef(OrigBase);
1284 SUnit *DefSU = getSUnit(
DefMI);
1288 MachineInstr *LastMI =
MRI.getUniqueVRegDef(NewBase);
1291 SUnit *LastSU = getSUnit(LastMI);
1295 if (Topo.IsReachable(&
I, LastSU))
1300 for (
const SDep &
P :
I.Preds)
1301 if (
P.getSUnit() == DefSU)
1303 for (
const SDep &
D : Deps) {
1304 Topo.RemovePred(&
I,
D.getSUnit());
1309 for (
auto &
P : LastSU->
Preds)
1312 for (
const SDep &
D : Deps) {
1313 Topo.RemovePred(LastSU,
D.getSUnit());
1320 Topo.AddPred(LastSU, &
I);
1325 InstrChanges[&
I] = std::make_pair(NewBase, NewOffset);
1336 std::vector<MachineInstr *> &OrderedInsts,
1344 Stage <= LastStage; ++Stage) {
1347 Instrs[
Cycle].push_front(SU);
1354 std::deque<SUnit *> &CycleInstrs = Instrs[
Cycle];
1356 for (
SUnit *SU : CycleInstrs) {
1358 OrderedInsts.push_back(
MI);
1368struct FuncUnitSorter {
1369 const InstrItineraryData *InstrItins;
1370 const MCSubtargetInfo *STI;
1371 DenseMap<InstrStage::FuncUnits, unsigned> Resources;
1373 FuncUnitSorter(
const TargetSubtargetInfo &TSI)
1374 : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
1379 unsigned minFuncUnits(
const MachineInstr *Inst,
1382 unsigned min = UINT_MAX;
1383 if (InstrItins && !InstrItins->
isEmpty()) {
1384 for (
const InstrStage &IS :
1386 InstrItins->
endStage(SchedClass))) {
1389 if (numAlternatives < min) {
1390 min = numAlternatives;
1397 const MCSchedClassDesc *SCDesc =
1404 for (
const MCWriteProcResEntry &PRE :
1407 if (!PRE.ReleaseAtCycle)
1409 const MCProcResourceDesc *ProcResource =
1411 unsigned NumUnits = ProcResource->
NumUnits;
1412 if (NumUnits < min) {
1414 F = PRE.ProcResourceIdx;
1419 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1427 void calcCriticalResources(MachineInstr &
MI) {
1428 unsigned SchedClass =
MI.getDesc().getSchedClass();
1429 if (InstrItins && !InstrItins->
isEmpty()) {
1430 for (
const InstrStage &IS :
1432 InstrItins->
endStage(SchedClass))) {
1435 Resources[FuncUnits]++;
1440 const MCSchedClassDesc *SCDesc =
1447 for (
const MCWriteProcResEntry &PRE :
1450 if (!PRE.ReleaseAtCycle)
1452 Resources[PRE.ProcResourceIdx]++;
1456 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1460 bool operator()(
const MachineInstr *IS1,
const MachineInstr *IS2)
const {
1462 unsigned MFUs1 = minFuncUnits(IS1, F1);
1463 unsigned MFUs2 = minFuncUnits(IS2, F2);
1466 return MFUs1 > MFUs2;
1471class HighRegisterPressureDetector {
1472 MachineBasicBlock *OrigMBB;
1473 const MachineRegisterInfo &
MRI;
1474 const TargetRegisterInfo *
TRI;
1476 const unsigned PSetNum;
1482 std::vector<unsigned> InitSetPressure;
1486 std::vector<unsigned> PressureSetLimit;
1488 DenseMap<MachineInstr *, RegisterOperands> ROMap;
1490 using Instr2LastUsesTy = DenseMap<MachineInstr *, SmallDenseSet<Register, 4>>;
1493 using OrderedInstsTy = std::vector<MachineInstr *>;
1494 using Instr2StageTy = DenseMap<MachineInstr *, unsigned>;
1497 static void dumpRegisterPressures(
const std::vector<unsigned> &Pressures) {
1498 if (Pressures.size() == 0) {
1502 for (
unsigned P : Pressures) {
1512 for (
auto PSetIter =
MRI.getPressureSets(
Reg); PSetIter.isValid();
1514 dbgs() << *PSetIter <<
' ';
1519 void increaseRegisterPressure(std::vector<unsigned> &Pressure,
1521 auto PSetIter =
MRI.getPressureSets(
Reg);
1522 unsigned Weight = PSetIter.getWeight();
1523 for (; PSetIter.isValid(); ++PSetIter)
1524 Pressure[*PSetIter] += Weight;
1527 void decreaseRegisterPressure(std::vector<unsigned> &Pressure,
1529 auto PSetIter =
MRI.getPressureSets(
Reg);
1530 unsigned Weight = PSetIter.getWeight();
1531 for (; PSetIter.isValid(); ++PSetIter) {
1532 auto &
P = Pressure[*PSetIter];
1534 "register pressure must be greater than or equal weight");
1556 void computeLiveIn() {
1557 DenseSet<Register>
Used;
1558 for (
auto &
MI : *OrigMBB) {
1559 if (
MI.isDebugInstr())
1561 for (
auto &Use : ROMap[&
MI].
Uses) {
1567 if (isReservedRegister(
Reg))
1569 if (isDefinedInThisLoop(
Reg))
1575 for (
auto LiveIn : Used)
1576 increaseRegisterPressure(InitSetPressure, LiveIn);
1580 void computePressureSetLimit(
const RegisterClassInfo &RCI) {
1581 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1596 Instr2LastUsesTy computeLastUses(
const OrderedInstsTy &OrderedInsts,
1597 Instr2StageTy &Stages)
const {
1602 DenseSet<Register> TargetRegs;
1603 const auto UpdateTargetRegs = [
this, &TargetRegs](
Register Reg) {
1604 if (isDefinedInThisLoop(
Reg))
1607 for (MachineInstr *
MI : OrderedInsts) {
1610 UpdateTargetRegs(
Reg);
1612 for (
auto &Use : ROMap.
find(
MI)->getSecond().Uses)
1613 UpdateTargetRegs(
Use.RegUnit);
1617 const auto InstrScore = [&Stages](MachineInstr *
MI) {
1618 return Stages[
MI] +
MI->isPHI();
1621 DenseMap<Register, MachineInstr *> LastUseMI;
1623 for (
auto &Use : ROMap.
find(
MI)->getSecond().Uses) {
1629 MachineInstr *Orig = Ite->second;
1630 MachineInstr *
New =
MI;
1631 if (InstrScore(Orig) < InstrScore(New))
1637 Instr2LastUsesTy LastUses;
1638 for (
auto &Entry : LastUseMI)
1655 std::vector<unsigned>
1656 computeMaxSetPressure(
const OrderedInstsTy &OrderedInsts,
1657 Instr2StageTy &Stages,
1658 const unsigned StageCount)
const {
1659 using RegSetTy = SmallDenseSet<Register, 16>;
1665 auto CurSetPressure = InitSetPressure;
1666 auto MaxSetPressure = InitSetPressure;
1667 auto LastUses = computeLastUses(OrderedInsts, Stages);
1670 dbgs() <<
"Ordered instructions:\n";
1671 for (MachineInstr *
MI : OrderedInsts) {
1672 dbgs() <<
"Stage " << Stages[
MI] <<
": ";
1677 const auto InsertReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1687 increaseRegisterPressure(CurSetPressure,
Reg);
1691 const auto EraseReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1697 if (!RegSet.contains(
Reg))
1702 decreaseRegisterPressure(CurSetPressure,
Reg);
1706 for (
unsigned I = 0;
I < StageCount;
I++) {
1707 for (MachineInstr *
MI : OrderedInsts) {
1708 const auto Stage = Stages[
MI];
1712 const unsigned Iter =
I - Stage;
1714 for (
auto &Def : ROMap.
find(
MI)->getSecond().Defs)
1715 InsertReg(LiveRegSets[Iter],
Def.RegUnit);
1717 for (
auto LastUse : LastUses[
MI]) {
1720 EraseReg(LiveRegSets[Iter - 1], LastUse);
1722 EraseReg(LiveRegSets[Iter], LastUse);
1726 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1727 MaxSetPressure[PSet] =
1728 std::max(MaxSetPressure[PSet], CurSetPressure[PSet]);
1731 dbgs() <<
"CurSetPressure=";
1732 dumpRegisterPressures(CurSetPressure);
1733 dbgs() <<
" iter=" << Iter <<
" stage=" << Stage <<
":";
1739 return MaxSetPressure;
1743 HighRegisterPressureDetector(MachineBasicBlock *OrigMBB,
1744 const MachineFunction &MF)
1745 : OrigMBB(OrigMBB),
MRI(MF.getRegInfo()),
1746 TRI(MF.getSubtarget().getRegisterInfo()),
1747 PSetNum(
TRI->getNumRegPressureSets()), InitSetPressure(PSetNum, 0),
1748 PressureSetLimit(PSetNum, 0) {}
1752 void init(
const RegisterClassInfo &RCI) {
1753 for (MachineInstr &
MI : *OrigMBB) {
1754 if (
MI.isDebugInstr())
1756 ROMap[&
MI].collect(
MI, *
TRI,
MRI,
false,
true);
1760 computePressureSetLimit(RCI);
1765 bool detect(
const SwingSchedulerDAG *SSD, SMSchedule &Schedule,
1766 const unsigned MaxStage)
const {
1768 "the percentage of the margin must be between 0 to 100");
1770 OrderedInstsTy OrderedInsts;
1771 Instr2StageTy Stages;
1773 const auto MaxSetPressure =
1774 computeMaxSetPressure(OrderedInsts, Stages, MaxStage + 1);
1777 dbgs() <<
"Dump MaxSetPressure:\n";
1778 for (
unsigned I = 0;
I < MaxSetPressure.size();
I++) {
1779 dbgs() <<
format(
"MaxSetPressure[%d]=%d\n",
I, MaxSetPressure[
I]);
1784 for (
unsigned PSet = 0; PSet < PSetNum; PSet++) {
1785 unsigned Limit = PressureSetLimit[PSet];
1788 <<
" Margin=" << Margin <<
"\n");
1789 if (Limit < MaxSetPressure[PSet] + Margin) {
1792 <<
"Rejected the schedule because of too high register pressure\n");
1808unsigned SwingSchedulerDAG::calculateResMII() {
1810 ResourceManager
RM(&MF.getSubtarget(),
this);
1811 return RM.calculateResMII();
1820unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1821 unsigned RecMII = 0;
1823 for (NodeSet &Nodes : NodeSets) {
1827 unsigned Delay = Nodes.getLatency();
1828 unsigned Distance = 1;
1831 unsigned CurMII = (Delay + Distance - 1) / Distance;
1832 Nodes.setRecMII(CurMII);
1833 if (CurMII > RecMII)
1841void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1842 SwingSchedulerDAG *DAG) {
1843 BitVector
Added(SUnits.size());
1844 DenseMap<int, int> OutputDeps;
1845 for (
int i = 0, e = SUnits.size(); i != e; ++i) {
1848 for (
auto &OE : DAG->DDG->getOutEdges(&SUnits[i])) {
1851 if (OE.isOutputDep()) {
1852 int N = OE.getDst()->NodeNum;
1854 auto Dep = OutputDeps.
find(BackEdge);
1855 if (Dep != OutputDeps.
end()) {
1856 BackEdge = Dep->second;
1857 OutputDeps.
erase(Dep);
1859 OutputDeps[
N] = BackEdge;
1862 if (OE.getDst()->isBoundaryNode() || OE.isArtificial())
1874 int N = OE.getDst()->NodeNum;
1876 AdjK[i].push_back(
N);
1882 for (
auto &IE : DAG->DDG->getInEdges(&SUnits[i])) {
1883 SUnit *Src =
IE.getSrc();
1884 SUnit *Dst =
IE.getDst();
1887 if (
IE.isOrderDep() && Src->getInstr()->mayLoad()) {
1888 int N = Src->NodeNum;
1890 AdjK[i].push_back(
N);
1897 for (
auto &OD : OutputDeps)
1898 if (!
Added.test(OD.second)) {
1899 AdjK[OD.first].push_back(OD.second);
1900 Added.set(OD.second);
1906bool SwingSchedulerDAG::Circuits::circuit(
int V,
int S, NodeSetType &NodeSets,
1907 const SwingSchedulerDAG *DAG,
1909 SUnit *SV = &SUnits[
V];
1914 for (
auto W : AdjK[V]) {
1915 if (NumPaths > MaxPaths)
1926 if (!Blocked.test(W)) {
1927 if (circuit(W, S, NodeSets, DAG,
1928 Node2Idx->at(W) < Node2Idx->at(V) ?
true : HasBackedge))
1936 for (
auto W : AdjK[V]) {
1947void SwingSchedulerDAG::Circuits::unblock(
int U) {
1949 SmallPtrSet<SUnit *, 4> &BU =
B[
U];
1950 while (!BU.
empty()) {
1951 SmallPtrSet<SUnit *, 4>::iterator
SI = BU.
begin();
1952 assert(SI != BU.
end() &&
"Invalid B set.");
1955 if (Blocked.test(
W->NodeNum))
1956 unblock(
W->NodeNum);
1962void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1963 Circuits Cir(SUnits, Topo);
1965 Cir.createAdjacencyStructure(
this);
1966 for (
int I = 0,
E = SUnits.size();
I !=
E; ++
I) {
1968 Cir.circuit(
I,
I, NodeSets,
this);
1990void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
1991 for (SUnit &SU : DAG->
SUnits) {
2001 for (
auto &Dep : SU.
Preds) {
2002 SUnit *TmpSU = Dep.getSUnit();
2003 MachineInstr *TmpMI = TmpSU->
getInstr();
2014 if (PHISUs.
size() == 0 || SrcSUs.
size() == 0)
2022 for (
auto &Dep : PHISUs[Index]->Succs) {
2026 SUnit *TmpSU = Dep.getSUnit();
2027 MachineInstr *TmpMI = TmpSU->
getInstr();
2036 if (UseSUs.
size() == 0)
2041 for (
auto *
I : UseSUs) {
2042 for (
auto *Src : SrcSUs) {
2058void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
2059 ScheduleInfo.resize(SUnits.size());
2062 for (
int I : Topo) {
2063 const SUnit &SU = SUnits[
I];
2070 for (
int I : Topo) {
2072 int zeroLatencyDepth = 0;
2073 SUnit *SU = &SUnits[
I];
2074 for (
const auto &IE : DDG->getInEdges(SU)) {
2075 SUnit *Pred =
IE.getSrc();
2076 if (
IE.getLatency() == 0)
2078 std::max(zeroLatencyDepth, getZeroLatencyDepth(Pred) + 1);
2079 if (
IE.ignoreDependence(
true))
2081 asap = std::max(asap, (
int)(getASAP(Pred) +
IE.getLatency() -
2082 IE.getDistance() * MII));
2084 maxASAP = std::max(maxASAP, asap);
2085 ScheduleInfo[
I].ASAP = asap;
2086 ScheduleInfo[
I].ZeroLatencyDepth = zeroLatencyDepth;
2092 int zeroLatencyHeight = 0;
2093 SUnit *SU = &SUnits[
I];
2094 for (
const auto &OE : DDG->getOutEdges(SU)) {
2095 SUnit *Succ = OE.getDst();
2098 if (OE.getLatency() == 0)
2100 std::max(zeroLatencyHeight, getZeroLatencyHeight(Succ) + 1);
2101 if (OE.ignoreDependence(
true))
2103 alap = std::min(alap, (
int)(getALAP(Succ) - OE.getLatency() +
2104 OE.getDistance() * MII));
2107 ScheduleInfo[
I].ALAP = alap;
2108 ScheduleInfo[
I].ZeroLatencyHeight = zeroLatencyHeight;
2112 for (NodeSet &
I : NodeSets)
2113 I.computeNodeSetInfo(
this);
2116 for (
unsigned i = 0; i < SUnits.size(); i++) {
2117 dbgs() <<
"\tNode " << i <<
":\n";
2118 dbgs() <<
"\t ASAP = " << getASAP(&SUnits[i]) <<
"\n";
2119 dbgs() <<
"\t ALAP = " << getALAP(&SUnits[i]) <<
"\n";
2120 dbgs() <<
"\t MOV = " << getMOV(&SUnits[i]) <<
"\n";
2121 dbgs() <<
"\t D = " << getDepth(&SUnits[i]) <<
"\n";
2122 dbgs() <<
"\t H = " << getHeight(&SUnits[i]) <<
"\n";
2123 dbgs() <<
"\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) <<
"\n";
2124 dbgs() <<
"\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) <<
"\n";
2139 SUnit *PredSU = IE.getSrc();
2140 if (S && S->count(PredSU) == 0)
2142 if (IE.ignoreDependence(
true))
2153 SUnit *SuccSU = OE.getDst();
2154 if (!OE.isAntiDep())
2156 if (S && S->count(SuccSU) == 0)
2162 return !Preds.
empty();
2175 SUnit *SuccSU = OE.getDst();
2176 if (S && S->count(SuccSU) == 0)
2178 if (OE.ignoreDependence(
false))
2189 SUnit *PredSU = IE.getSrc();
2190 if (!IE.isAntiDep())
2192 if (S && S->count(PredSU) == 0)
2198 return !Succs.
empty();
2214 if (!Visited.
insert(Cur).second)
2215 return Path.contains(Cur);
2216 bool FoundPath =
false;
2218 if (!OE.ignoreDependence(
false))
2220 computePath(OE.getDst(), Path, DestNodes, Exclude, Visited, DDG);
2222 if (IE.isAntiDep() && IE.getDistance() == 0)
2224 computePath(IE.getSrc(), Path, DestNodes, Exclude, Visited, DDG);
2239 for (
SUnit *SU : NS) {
2245 if (
Reg.isVirtual())
2247 else if (
MRI.isAllocatable(
Reg))
2248 Uses.insert_range(
TRI->regunits(
Reg.asMCReg()));
2251 for (
SUnit *SU : NS)
2255 if (
Reg.isVirtual()) {
2258 }
else if (
MRI.isAllocatable(
Reg)) {
2260 if (!
Uses.count(Unit))
2269void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
2270 for (
auto &NS : NodeSets) {
2274 IntervalPressure RecRegPressure;
2275 RegPressureTracker RecRPTracker(RecRegPressure);
2276 RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(),
false,
true);
2278 RecRPTracker.closeBottom();
2280 std::vector<SUnit *> SUnits(NS.begin(), NS.end());
2281 llvm::sort(SUnits, [](
const SUnit *
A,
const SUnit *
B) {
2282 return A->NodeNum >
B->NodeNum;
2285 for (
auto &SU : SUnits) {
2291 RecRPTracker.setPos(std::next(CurInstI));
2293 RegPressureDelta RPDelta;
2295 RecRPTracker.getMaxUpwardPressureDelta(SU->
getInstr(),
nullptr, RPDelta,
2300 dbgs() <<
"Excess register pressure: SU(" << SU->
NodeNum <<
") "
2303 NS.setExceedPressure(SU);
2306 RecRPTracker.recede();
2313void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
2314 unsigned Colocate = 0;
2315 for (
int i = 0, e = NodeSets.size(); i < e; ++i) {
2317 SmallSetVector<SUnit *, 8>
S1;
2320 for (
int j = i + 1;
j <
e; ++
j) {
2324 SmallSetVector<SUnit *, 8> S2;
2341void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
2346 for (
auto &NS : NodeSets) {
2347 if (NS.getRecMII() > 2)
2349 if (NS.getMaxDepth() > MII)
2358void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
2359 SetVector<SUnit *> NodesAdded;
2360 SmallPtrSet<SUnit *, 8> Visited;
2363 for (NodeSet &
I : NodeSets) {
2364 SmallSetVector<SUnit *, 8>
N;
2367 SetVector<SUnit *>
Path;
2368 for (SUnit *NI :
N) {
2370 computePath(NI, Path, NodesAdded,
I, Visited, DDG.get());
2377 if (
succ_L(NodesAdded,
N, DDG.get())) {
2378 SetVector<SUnit *>
Path;
2379 for (SUnit *NI :
N) {
2381 computePath(NI, Path,
I, NodesAdded, Visited, DDG.get());
2392 SmallSetVector<SUnit *, 8>
N;
2393 if (
succ_L(NodesAdded,
N, DDG.get()))
2395 addConnectedNodes(
I, NewSet, NodesAdded);
2396 if (!NewSet.
empty())
2397 NodeSets.push_back(NewSet);
2402 if (
pred_L(NodesAdded,
N, DDG.get()))
2404 addConnectedNodes(
I, NewSet, NodesAdded);
2405 if (!NewSet.
empty())
2406 NodeSets.push_back(NewSet);
2410 for (SUnit &SU : SUnits) {
2411 if (NodesAdded.
count(&SU) == 0) {
2413 addConnectedNodes(&SU, NewSet, NodesAdded);
2414 if (!NewSet.
empty())
2415 NodeSets.push_back(NewSet);
2421void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
2422 SetVector<SUnit *> &NodesAdded) {
2425 for (
auto &OE : DDG->getOutEdges(SU)) {
2427 if (!OE.isArtificial() && !
Successor->isBoundaryNode() &&
2429 addConnectedNodes(
Successor, NewSet, NodesAdded);
2431 for (
auto &IE : DDG->getInEdges(SU)) {
2432 SUnit *Predecessor =
IE.getSrc();
2433 if (!
IE.isArtificial() && NodesAdded.
count(Predecessor) == 0)
2434 addConnectedNodes(Predecessor, NewSet, NodesAdded);
2443 for (
SUnit *SU : Set1) {
2444 if (Set2.
count(SU) != 0)
2447 return !Result.empty();
2451void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
2452 for (NodeSetType::iterator
I = NodeSets.begin(),
E = NodeSets.end();
I !=
E;
2455 for (NodeSetType::iterator J =
I + 1; J !=
E;) {
2460 for (SUnit *SU : *J)
2472void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
2473 for (NodeSetType::iterator
I = NodeSets.begin(),
E = NodeSets.end();
I !=
E;
2475 for (NodeSetType::iterator J =
I + 1; J !=
E;) {
2476 J->remove_if([&](SUnit *SUJ) {
return I->count(SUJ); });
2491void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
2492 SmallSetVector<SUnit *, 8>
R;
2495 for (
auto &Nodes : NodeSets) {
2498 SmallSetVector<SUnit *, 8>
N;
2513 }
else if (NodeSets.size() == 1) {
2514 for (
const auto &
N : Nodes)
2515 if (
N->Succs.size() == 0)
2521 SUnit *maxASAP =
nullptr;
2522 for (SUnit *SU : Nodes) {
2523 if (maxASAP ==
nullptr || getASAP(SU) > getASAP(maxASAP) ||
2524 (getASAP(SU) == getASAP(maxASAP) && SU->
NodeNum > maxASAP->
NodeNum))
2532 while (!
R.empty()) {
2533 if (Order == TopDown) {
2537 while (!
R.empty()) {
2538 SUnit *maxHeight =
nullptr;
2539 for (SUnit *
I : R) {
2540 if (maxHeight ==
nullptr || getHeight(
I) > getHeight(maxHeight))
2542 else if (getHeight(
I) == getHeight(maxHeight) &&
2543 getZeroLatencyHeight(
I) > getZeroLatencyHeight(maxHeight))
2545 else if (getHeight(
I) == getHeight(maxHeight) &&
2546 getZeroLatencyHeight(
I) ==
2547 getZeroLatencyHeight(maxHeight) &&
2548 getMOV(
I) < getMOV(maxHeight))
2553 R.remove(maxHeight);
2554 for (
const auto &OE : DDG->getOutEdges(maxHeight)) {
2555 SUnit *SU = OE.getDst();
2556 if (Nodes.count(SU) == 0)
2560 if (OE.ignoreDependence(
false))
2569 for (
const auto &IE : DDG->getInEdges(maxHeight)) {
2570 SUnit *SU =
IE.getSrc();
2571 if (!
IE.isAntiDep())
2573 if (Nodes.count(SU) == 0)
2582 SmallSetVector<SUnit *, 8>
N;
2589 while (!
R.empty()) {
2590 SUnit *maxDepth =
nullptr;
2591 for (SUnit *
I : R) {
2592 if (maxDepth ==
nullptr || getDepth(
I) > getDepth(maxDepth))
2594 else if (getDepth(
I) == getDepth(maxDepth) &&
2595 getZeroLatencyDepth(
I) > getZeroLatencyDepth(maxDepth))
2597 else if (getDepth(
I) == getDepth(maxDepth) &&
2598 getZeroLatencyDepth(
I) == getZeroLatencyDepth(maxDepth) &&
2599 getMOV(
I) < getMOV(maxDepth))
2605 if (Nodes.isExceedSU(maxDepth)) {
2608 R.insert(Nodes.getNode(0));
2611 for (
const auto &IE : DDG->getInEdges(maxDepth)) {
2612 SUnit *SU =
IE.getSrc();
2613 if (Nodes.count(SU) == 0)
2624 for (
const auto &OE : DDG->getOutEdges(maxDepth)) {
2625 SUnit *SU = OE.getDst();
2626 if (!OE.isAntiDep())
2628 if (Nodes.count(SU) == 0)
2637 SmallSetVector<SUnit *, 8>
N;
2646 dbgs() <<
"Node order: ";
2648 dbgs() <<
" " <<
I->NodeNum <<
" ";
2655bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2662 bool scheduleFound =
false;
2663 std::unique_ptr<HighRegisterPressureDetector> HRPDetector;
2666 std::make_unique<HighRegisterPressureDetector>(Loop.getHeader(), MF);
2667 HRPDetector->init(RegClassInfo);
2670 for (
unsigned II = MII;
II <= MAX_II && !scheduleFound; ++
II) {
2682 int EarlyStart = INT_MIN;
2683 int LateStart = INT_MAX;
2692 dbgs() <<
format(
"\tes: %8x ls: %8x\n", EarlyStart, LateStart));
2694 if (EarlyStart > LateStart)
2695 scheduleFound =
false;
2696 else if (EarlyStart != INT_MIN && LateStart == INT_MAX)
2698 Schedule.
insert(SU, EarlyStart, EarlyStart + (
int)
II - 1,
II);
2699 else if (EarlyStart == INT_MIN && LateStart != INT_MAX)
2701 Schedule.
insert(SU, LateStart, LateStart - (
int)
II + 1,
II);
2702 else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2703 LateStart = std::min(LateStart, EarlyStart + (
int)
II - 1);
2712 scheduleFound = Schedule.
insert(SU, LateStart, EarlyStart,
II);
2714 scheduleFound = Schedule.
insert(SU, EarlyStart, LateStart,
II);
2717 scheduleFound = Schedule.
insert(SU, FirstCycle + getASAP(SU),
2718 FirstCycle + getASAP(SU) +
II - 1,
II);
2726 scheduleFound =
false;
2730 dbgs() <<
"\tCan't schedule\n";
2732 }
while (++NI != NE && scheduleFound);
2737 scheduleFound = DDG->isValidSchedule(Schedule);
2759 if (scheduleFound) {
2760 scheduleFound = LoopPipelinerInfo->shouldUseSchedule(*
this, Schedule);
2765 if (scheduleFound) {
2767 Pass.ORE->emit([&]() {
2768 return MachineOptimizationRemarkAnalysis(
2769 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
2770 <<
"Schedule found with Initiation Interval: "
2772 <<
", MaxStageCount: "
2786 if (!
Reg.isVirtual())
2788 if (
MRI.getVRegDef(
Reg)->getParent() !=
MI.getParent())
2800 if (!
Op.isReg() || !
Op.getReg().isVirtual())
2828 if (Def->getParent() != LoopBB)
2831 if (Def->isCopy()) {
2833 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
2835 CurReg = Def->getOperand(1).getReg();
2836 }
else if (Def->isPHI()) {
2842 }
else if (
TII->getIncrementValue(*Def,
Value)) {
2850 bool OffsetIsScalable;
2851 if (
TII->getMemOperandWithOffset(*Def, BaseOp,
Offset, OffsetIsScalable,
2854 CurReg = BaseOp->
getReg();
2866 if (CurReg == OrgReg)
2870 if (!Phi || !Increment)
2878bool SwingSchedulerDAG::computeDelta(
const MachineInstr &
MI,
int &Delta)
const {
2879 const TargetRegisterInfo *
TRI = MF.getSubtarget().getRegisterInfo();
2880 const MachineOperand *BaseOp;
2882 bool OffsetIsScalable;
2883 if (!
TII->getMemOperandWithOffset(
MI, BaseOp,
Offset, OffsetIsScalable,
TRI))
2887 if (OffsetIsScalable)
2890 if (!BaseOp->
isReg())
2903bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *
MI,
2905 unsigned &OffsetPos,
2911 unsigned BasePosLd, OffsetPosLd;
2917 MachineRegisterInfo &
MRI =
MI->getMF()->getRegInfo();
2918 MachineInstr *
Phi =
MRI.getVRegDef(BaseReg);
2919 if (!Phi || !
Phi->isPHI())
2927 MachineInstr *PrevDef =
MRI.getVRegDef(PrevReg);
2928 if (!PrevDef || PrevDef ==
MI)
2934 unsigned BasePos1 = 0, OffsetPos1 = 0;
2940 int64_t LoadOffset =
MI->getOperand(OffsetPosLd).getImm();
2942 MachineInstr *NewMI = MF.CloneMachineInstr(
MI);
2945 MF.deleteMachineInstr(NewMI);
2950 BasePos = BasePosLd;
2951 OffsetPos = OffsetPosLd;
2963 InstrChanges.find(SU);
2964 if (It != InstrChanges.
end()) {
2965 std::pair<Register, int64_t> RegAndOffset = It->second;
2966 unsigned BasePos, OffsetPos;
2967 if (!
TII->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos))
2969 Register BaseReg =
MI->getOperand(BasePos).getReg();
2975 if (BaseStageNum < DefStageNum) {
2977 int OffsetDiff = DefStageNum - BaseStageNum;
2978 if (DefCycleNum < BaseCycleNum) {
2984 MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
2999 while (Def->isPHI()) {
3000 if (!Visited.
insert(Def).second)
3002 for (
unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
3003 if (Def->getOperand(i + 1).getMBB() == BB) {
3004 Def =
MRI.getVRegDef(Def->getOperand(i).getReg());
3015 int DeltaB, DeltaO, Delta;
3022 int64_t OffsetB, OffsetO;
3023 bool OffsetBIsScalable, OffsetOIsScalable;
3025 if (!
TII->getMemOperandWithOffset(*BaseMI, BaseOpB, OffsetB,
3026 OffsetBIsScalable,
TRI) ||
3027 !
TII->getMemOperandWithOffset(*OtherMI, BaseOpO, OffsetO,
3028 OffsetOIsScalable,
TRI))
3031 if (OffsetBIsScalable || OffsetOIsScalable)
3041 if (!RegB.
isVirtual() || !RegO.isVirtual())
3046 if (!DefB || !DefO || !DefB->
isPHI() || !DefO->
isPHI())
3071 dbgs() <<
"Overlap check:\n";
3072 dbgs() <<
" BaseMI: ";
3074 dbgs() <<
" Base + " << OffsetB <<
" + I * " << Delta
3075 <<
", Len: " << AccessSizeB.
getValue() <<
"\n";
3076 dbgs() <<
" OtherMI: ";
3078 dbgs() <<
" Base + " << OffsetO <<
" + I * " << Delta
3079 <<
", Len: " << AccessSizeO.
getValue() <<
"\n";
3087 int64_t BaseMinAddr = OffsetB;
3088 int64_t OhterNextIterMaxAddr = OffsetO + Delta + AccessSizeO.
getValue() - 1;
3089 if (BaseMinAddr > OhterNextIterMaxAddr) {
3094 int64_t BaseMaxAddr = OffsetB + AccessSizeB.
getValue() - 1;
3095 int64_t OtherNextIterMinAddr = OffsetO + Delta;
3096 if (BaseMaxAddr < OtherNextIterMinAddr) {
3110 if ((!Edge.isOrderDep() && !Edge.isOutputDep()) || Edge.isArtificial() ||
3111 Edge.getDst()->isBoundaryNode())
3117 if (Edge.isOutputDep())
3122 assert(
SI !=
nullptr && DI !=
nullptr &&
"Expecting SUnit with an MI.");
3139void SwingSchedulerDAG::postProcessDAG() {
3140 for (
auto &M : Mutations)
3150 bool forward =
true;
3152 dbgs() <<
"Trying to insert node between " << StartCycle <<
" and "
3153 << EndCycle <<
" II: " <<
II <<
"\n";
3155 if (StartCycle > EndCycle)
3159 int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
3160 for (
int curCycle = StartCycle; curCycle != termCycle;
3161 forward ? ++curCycle : --curCycle) {
3164 ProcItinResources.canReserveResources(*SU, curCycle)) {
3166 dbgs() <<
"\tinsert at cycle " << curCycle <<
" ";
3171 ProcItinResources.reserveResources(*SU, curCycle);
3172 ScheduledInstrs[curCycle].push_back(SU);
3173 InstrToCycle.insert(std::make_pair(SU, curCycle));
3174 if (curCycle > LastCycle)
3175 LastCycle = curCycle;
3176 if (curCycle < FirstCycle)
3177 FirstCycle = curCycle;
3181 dbgs() <<
"\tfailed to insert at cycle " << curCycle <<
" ";
3194 int EarlyCycle = INT_MAX;
3195 while (!Worklist.
empty()) {
3198 if (Visited.
count(PrevSU))
3200 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
3201 if (it == InstrToCycle.end())
3203 EarlyCycle = std::min(EarlyCycle, it->second);
3204 for (
const auto &IE : DDG->
getInEdges(PrevSU))
3205 if (IE.isOrderDep() || IE.isOutputDep())
3218 int LateCycle = INT_MIN;
3219 while (!Worklist.
empty()) {
3224 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
3225 if (it == InstrToCycle.end())
3227 LateCycle = std::max(LateCycle, it->second);
3229 if (OE.isOrderDep() || OE.isOutputDep())
3240 for (
auto &
P : SU->
Preds)
3241 if (
P.getKind() ==
SDep::Anti &&
P.getSUnit()->getInstr()->isPHI())
3242 for (
auto &S :
P.getSUnit()->Succs)
3243 if (S.getKind() ==
SDep::Data && S.getSUnit()->getInstr()->isPHI())
3244 return P.getSUnit();
3257 for (
int cycle =
getFirstCycle(); cycle <= LastCycle; ++cycle) {
3260 if (IE.getSrc() ==
I) {
3265 *MinLateStart = std::min(*MinLateStart, End);
3267 int EarlyStart = cycle + IE.getLatency() - IE.getDistance() *
II;
3268 *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3273 if (OE.getDst() ==
I) {
3278 *MaxEarlyStart = std::max(*MaxEarlyStart, Start);
3280 int LateStart = cycle - OE.getLatency() + OE.getDistance() *
II;
3281 *MinLateStart = std::min(*MinLateStart, LateStart);
3286 for (
const auto &Dep : SU->
Preds) {
3289 if (BE && Dep.getSUnit() == BE && !SU->
getInstr()->
isPHI() &&
3291 *MinLateStart = std::min(*MinLateStart, cycle);
3301 std::deque<SUnit *> &Insts)
const {
3303 bool OrderBeforeUse =
false;
3304 bool OrderAfterDef =
false;
3305 bool OrderBeforeDef =
false;
3306 unsigned MoveDef = 0;
3307 unsigned MoveUse = 0;
3312 for (std::deque<SUnit *>::iterator
I = Insts.begin(), E = Insts.end();
I != E;
3315 if (!MO.isReg() || !MO.getReg().isVirtual())
3319 unsigned BasePos, OffsetPos;
3320 if (ST.getInstrInfo()->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos))
3321 if (
MI->getOperand(BasePos).getReg() == Reg)
3325 std::tie(Reads, Writes) =
3326 (*I)->getInstr()->readsWritesVirtualRegister(Reg);
3328 OrderBeforeUse =
true;
3333 OrderAfterDef =
true;
3335 }
else if (MO.isUse() && Writes &&
stageScheduled(*
I) == StageInst1) {
3337 OrderBeforeUse =
true;
3341 OrderAfterDef =
true;
3345 OrderBeforeUse =
true;
3349 OrderAfterDef =
true;
3354 OrderBeforeUse =
true;
3360 OrderBeforeDef =
true;
3368 if (OE.getDst() != *
I)
3371 OrderBeforeUse =
true;
3378 else if ((OE.isAntiDep() || OE.isOutputDep()) &&
3380 OrderBeforeUse =
true;
3381 if ((MoveUse == 0) || (Pos < MoveUse))
3386 if (IE.getSrc() != *
I)
3388 if ((IE.isAntiDep() || IE.isOutputDep() || IE.isOrderDep()) &&
3390 OrderAfterDef =
true;
3397 if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3398 OrderBeforeUse =
false;
3403 OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3407 if (OrderBeforeUse && OrderAfterDef) {
3408 SUnit *UseSU = Insts.at(MoveUse);
3409 SUnit *DefSU = Insts.at(MoveDef);
3410 if (MoveUse > MoveDef) {
3411 Insts.erase(Insts.begin() + MoveUse);
3412 Insts.erase(Insts.begin() + MoveDef);
3414 Insts.erase(Insts.begin() + MoveDef);
3415 Insts.erase(Insts.begin() + MoveUse);
3425 Insts.push_front(SU);
3427 Insts.push_back(SU);
3435 assert(Phi.isPHI() &&
"Expecting a Phi.");
3442 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3450 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3468 if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3474 if (DMO.getReg() == LoopReg)
3485 if (InstrToCycle.count(IE.getSrc()))
3496 for (
auto &SU : SSD->
SUnits)
3501 while (!Worklist.
empty()) {
3503 if (DoNotPipeline.
count(SU))
3506 DoNotPipeline.
insert(SU);
3513 if (OE.getDistance() == 1)
3516 return DoNotPipeline;
3525 int NewLastCycle = INT_MIN;
3530 NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);
3537 if (IE.getDistance() == 0)
3538 NewCycle = std::max(InstrToCycle[IE.getSrc()], NewCycle);
3543 if (OE.getDistance() == 1)
3544 NewCycle = std::max(InstrToCycle[OE.getDst()], NewCycle);
3546 int OldCycle = InstrToCycle[&SU];
3547 if (OldCycle != NewCycle) {
3548 InstrToCycle[&SU] = NewCycle;
3553 <<
") is not pipelined; moving from cycle " << OldCycle
3554 <<
" to " << NewCycle <<
" Instr:" << *SU.
getInstr());
3579 if (FirstCycle + InitiationInterval <= NewCycle)
3582 NewLastCycle = std::max(NewLastCycle, NewCycle);
3584 LastCycle = NewLastCycle;
3601 int CycleDef = InstrToCycle[&SU];
3602 assert(StageDef != -1 &&
"Instruction should have been scheduled.");
3604 SUnit *Dst = OE.getDst();
3605 if (OE.isAssignedRegDep() && !Dst->isBoundaryNode())
3606 if (OE.getReg().isPhysical()) {
3609 if (InstrToCycle[Dst] <= CycleDef)
3627void SwingSchedulerDAG::checkValidNodeOrder(
const NodeSetType &Circuits)
const {
3630 typedef std::pair<SUnit *, unsigned> UnitIndex;
3631 std::vector<UnitIndex> Indices(
NodeOrder.size(), std::make_pair(
nullptr, 0));
3633 for (
unsigned i = 0, s =
NodeOrder.size(); i < s; ++i)
3634 Indices.push_back(std::make_pair(
NodeOrder[i], i));
3636 auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3637 return std::get<0>(i1) < std::get<0>(i2);
3650 for (
unsigned i = 0, s =
NodeOrder.size(); i < s; ++i) {
3654 bool PredBefore =
false;
3655 bool SuccBefore =
false;
3662 for (
const auto &IE : DDG->getInEdges(SU)) {
3663 SUnit *PredSU = IE.getSrc();
3664 unsigned PredIndex = std::get<1>(
3673 for (
const auto &OE : DDG->getOutEdges(SU)) {
3674 SUnit *SuccSU = OE.getDst();
3680 unsigned SuccIndex = std::get<1>(
3693 Circuits, [SU](
const NodeSet &Circuit) {
return Circuit.
count(SU); });
3698 NumNodeOrderIssues++;
3702 <<
" are scheduled before node " << SU->
NodeNum
3709 dbgs() <<
"Invalid node order found!\n";
3722 for (
SUnit *SU : Instrs) {
3724 for (
unsigned i = 0, e =
MI->getNumOperands(); i < e; ++i) {
3732 InstrChanges.find(SU);
3733 if (It != InstrChanges.
end()) {
3734 unsigned BasePos, OffsetPos;
3736 if (
TII->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos)) {
3740 MI->getOperand(OffsetPos).getImm() - It->second.second;
3753 unsigned TiedUseIdx = 0;
3754 if (
MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3756 OverlapReg =
MI->getOperand(TiedUseIdx).getReg();
3758 NewBaseReg =
MI->getOperand(i).getReg();
3767 const std::deque<SUnit *> &Instrs)
const {
3768 std::deque<SUnit *> NewOrderPhi;
3769 for (
SUnit *SU : Instrs) {
3771 NewOrderPhi.push_back(SU);
3773 std::deque<SUnit *> NewOrderI;
3774 for (
SUnit *SU : Instrs) {
3790 std::deque<SUnit *> &cycleInstrs =
3791 ScheduledInstrs[cycle + (stage * InitiationInterval)];
3793 ScheduledInstrs[cycle].push_front(SU);
3799 for (
int cycle =
getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3800 ScheduledInstrs.erase(cycle);
3810 std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[
Cycle];
3819 os <<
"Num nodes " <<
size() <<
" rec " << RecMII <<
" mov " << MaxMOV
3820 <<
" depth " << MaxDepth <<
" col " << Colocate <<
"\n";
3821 for (
const auto &
I : Nodes)
3822 os <<
" SU(" <<
I->NodeNum <<
") " << *(
I->getInstr());
3826#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3833 for (
SUnit *CI : cycleInstrs->second) {
3835 os <<
"(" << CI->
NodeNum <<
") ";
3846void ResourceManager::dumpMRT()
const {
3850 std::stringstream SS;
3852 SS << std::setw(4) <<
"Slot";
3853 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I)
3854 SS << std::setw(3) <<
I;
3855 SS << std::setw(7) <<
"#Mops"
3857 for (
int Slot = 0; Slot < InitiationInterval; ++Slot) {
3858 SS << std::setw(4) << Slot;
3859 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I)
3860 SS << std::setw(3) << MRT[Slot][
I];
3861 SS << std::setw(7) << NumScheduledMops[Slot] <<
"\n";
3870 unsigned ProcResourceID = 0;
3874 assert(SM.getNumProcResourceKinds() < 64 &&
3875 "Too many kinds of resources, unsupported");
3878 Masks.
resize(SM.getNumProcResourceKinds());
3879 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
3881 if (
Desc.SubUnitsIdxBegin)
3883 Masks[
I] = 1ULL << ProcResourceID;
3887 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
3889 if (!
Desc.SubUnitsIdxBegin)
3891 Masks[
I] = 1ULL << ProcResourceID;
3892 for (
unsigned U = 0; U <
Desc.NumUnits; ++U)
3893 Masks[
I] |= Masks[
Desc.SubUnitsIdxBegin[U]];
3898 dbgs() <<
"ProcResourceDesc:\n";
3899 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
3901 dbgs() <<
format(
" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
3902 ProcResource->
Name,
I, Masks[
I],
3905 dbgs() <<
" -----------------\n";
3913 dbgs() <<
"canReserveResources:\n";
3916 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
3922 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
3928 reserveResources(SCDesc,
Cycle);
3929 bool Result = !isOverbooked();
3930 unreserveResources(SCDesc,
Cycle);
3939 dbgs() <<
"reserveResources:\n";
3942 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
3948 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
3954 reserveResources(SCDesc,
Cycle);
3959 dbgs() <<
"reserveResources: done!\n\n";
3970 ++MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
3973 ++NumScheduledMops[positiveModulo(
C, InitiationInterval)];
3982 --MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
3985 --NumScheduledMops[positiveModulo(
C, InitiationInterval)];
3988bool ResourceManager::isOverbooked()
const {
3990 for (
int Slot = 0;
Slot < InitiationInterval; ++
Slot) {
3991 for (
unsigned I = 1,
E = SM.getNumProcResourceKinds();
I <
E; ++
I) {
3992 const MCProcResourceDesc *
Desc = SM.getProcResource(
I);
3993 if (MRT[Slot][
I] >
Desc->NumUnits)
3996 if (NumScheduledMops[Slot] > IssueWidth)
4002int ResourceManager::calculateResMIIDFA()
const {
4007 FuncUnitSorter FUS = FuncUnitSorter(*ST);
4008 for (SUnit &SU : DAG->
SUnits)
4009 FUS.calcCriticalResources(*SU.
getInstr());
4010 PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter>
4013 for (SUnit &SU : DAG->
SUnits)
4020 while (!FuncUnitOrder.empty()) {
4021 MachineInstr *
MI = FuncUnitOrder.top();
4022 FuncUnitOrder.pop();
4023 if (
TII->isZeroCost(
MI->getOpcode()))
4029 unsigned ReservedCycles = 0;
4030 auto *RI = Resources.
begin();
4031 auto *RE = Resources.
end();
4033 dbgs() <<
"Trying to reserve resource for " << NumCycles
4034 <<
" cycles for \n";
4037 for (
unsigned C = 0;
C < NumCycles; ++
C)
4039 if ((*RI)->canReserveResources(*
MI)) {
4040 (*RI)->reserveResources(*
MI);
4047 <<
", NumCycles:" << NumCycles <<
"\n");
4049 for (
unsigned C = ReservedCycles;
C < NumCycles; ++
C) {
4051 <<
"NewResource created to reserve resources"
4054 assert(NewResource->canReserveResources(*
MI) &&
"Reserve error.");
4055 NewResource->reserveResources(*
MI);
4056 Resources.
push_back(std::unique_ptr<DFAPacketizer>(NewResource));
4060 int Resmii = Resources.
size();
4067 return calculateResMIIDFA();
4074 for (
SUnit &SU : DAG->SUnits) {
4086 <<
" WriteProcRes: ";
4091 make_range(STI->getWriteProcResBegin(SCDesc),
4092 STI->getWriteProcResEnd(SCDesc))) {
4096 SM.getProcResource(PRE.ProcResourceIdx);
4097 dbgs() <<
Desc->Name <<
": " << PRE.ReleaseAtCycle <<
", ";
4100 ResourceCount[PRE.ProcResourceIdx] += PRE.ReleaseAtCycle;
4105 int Result = (NumMops + IssueWidth - 1) / IssueWidth;
4108 dbgs() <<
"#Mops: " << NumMops <<
", "
4109 <<
"IssueWidth: " << IssueWidth <<
", "
4110 <<
"Cycles: " << Result <<
"\n";
4115 std::stringstream SS;
4116 SS << std::setw(2) <<
"ID" << std::setw(16) <<
"Name" << std::setw(10)
4117 <<
"Units" << std::setw(10) <<
"Consumed" << std::setw(10) <<
"Cycles"
4122 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
4124 int Cycles = (ResourceCount[
I] +
Desc->NumUnits - 1) /
Desc->NumUnits;
4127 std::stringstream SS;
4128 SS << std::setw(2) <<
I << std::setw(16) <<
Desc->Name << std::setw(10)
4129 <<
Desc->NumUnits << std::setw(10) << ResourceCount[
I]
4130 << std::setw(10) << Cycles <<
"\n";
4134 if (Cycles > Result)
4141 InitiationInterval =
II;
4142 DFAResources.clear();
4143 DFAResources.resize(
II);
4144 for (
auto &
I : DFAResources)
4145 I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
4148 NumScheduledMops.clear();
4149 NumScheduledMops.resize(
II);
4153 if (Pred.isArtificial() || Dst->isBoundaryNode())
4158 return IgnoreAnti && (Pred.getKind() ==
SDep::Kind::Anti || Distance != 0);
4161SwingSchedulerDDG::SwingSchedulerDDGEdges &
4162SwingSchedulerDDG::getEdges(
const SUnit *SU) {
4164 return EntrySUEdges;
4170const SwingSchedulerDDG::SwingSchedulerDDGEdges &
4171SwingSchedulerDDG::getEdges(
const SUnit *SU)
const {
4173 return EntrySUEdges;
4179void SwingSchedulerDDG::addEdge(
const SUnit *SU,
4180 const SwingSchedulerDDGEdge &
Edge) {
4182 "Validation-only edges are not expected here.");
4183 auto &Edges = getEdges(SU);
4184 if (
Edge.getSrc() == SU)
4185 Edges.Succs.push_back(
Edge);
4187 Edges.Preds.push_back(
Edge);
4190void SwingSchedulerDDG::initEdges(SUnit *SU) {
4191 for (
const auto &PI : SU->
Preds) {
4192 SwingSchedulerDDGEdge
Edge(SU, PI,
false,
4197 for (
const auto &SI : SU->
Succs) {
4198 SwingSchedulerDDGEdge
Edge(SU, SI,
true,
4206 : EntrySU(EntrySU), ExitSU(ExitSU) {
4207 EdgesVec.resize(SUnits.size());
4212 for (
auto &SU : SUnits)
4216 for (
SUnit &SU : SUnits) {
4221 for (
SUnit *Dst : *OD) {
4224 Edge.setDistance(1);
4225 ValidationOnlyEdges.push_back(Edge);
4231const SwingSchedulerDDG::EdgesType &
4233 return getEdges(SU).Preds;
4236const SwingSchedulerDDG::EdgesType &
4238 return getEdges(SU).Succs;
4245 auto ExpandCycle = [&](
SUnit *SU) {
4252 SUnit *Src = Edge.getSrc();
4253 SUnit *Dst = Edge.getDst();
4254 if (!Src->isInstr() || !Dst->isInstr())
4256 int CycleSrc = ExpandCycle(Src);
4257 int CycleDst = ExpandCycle(Dst);
4258 int MaxLateStart = CycleDst + Edge.getDistance() *
II - Edge.getLatency();
4259 if (CycleSrc > MaxLateStart) {
4261 dbgs() <<
"Validation failed for edge from " << Src->NodeNum <<
" to "
4262 << Dst->NodeNum <<
"\n";
4272 for (
SUnit &SU : SUnits) {
4301 !
TII->isGlobalMemoryObject(FromMI) &&
4319 const auto DumpSU = [](
const SUnit *SU) {
4320 std::ostringstream OSS;
4321 OSS <<
"SU(" << SU->
NodeNum <<
")";
4325 dbgs() <<
" Loop carried edges from " << DumpSU(SU) <<
"\n"
4327 for (
SUnit *Dst : *Order)
4328 dbgs() <<
" " << DumpSU(Dst) <<
"\n";
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::optional< unsigned > getTag(const TargetRegisterInfo *TRI, const MachineInstr &MI, const LoadInfo &LI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file defines the DenseMap class.
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)
print mir2vec MIR2Vec Vocabulary Printer Pass
static cl::opt< int > SwpForceII("pipeliner-force-ii", cl::desc("Force pipeliner to use specified II."), cl::Hidden, cl::init(-1))
A command line argument to force pipeliner to use specified initial interval.
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
static cl::opt< bool > MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false), cl::desc("Use the MVE code generator for software pipelining"))
static cl::opt< int > RegPressureMargin("pipeliner-register-pressure-margin", cl::Hidden, cl::init(5), cl::desc("Margin representing the unused percentage of " "the register pressure limit"))
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, Register &InitVal, Register &LoopVal)
Return the register values for the operands of a Phi instruction.
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
static void computeScheduledInsts(const SwingSchedulerDAG *SSD, SMSchedule &Schedule, std::vector< MachineInstr * > &OrderedInsts, DenseMap< MachineInstr *, unsigned > &Stages)
Create an instruction stream that represents a single iteration and stage of each instruction.
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
static Register getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
static bool findLoopIncrementValue(const MachineOperand &Op, int &Value)
When Op is a value that is incremented recursively in a loop and there is a unique instruction that i...
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::desc("Ignore RecMII"))
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
static cl::opt< unsigned > SwpMaxNumStores("pipeliner-max-num-stores", cl::desc("Maximum number of stores allwed in the target loop."), cl::Hidden, cl::init(200))
A command line argument to limit the number of store instructions in the target basic block.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
static cl::opt< WindowSchedulingFlag > WindowSchedulingOption("window-sched", cl::Hidden, cl::init(WindowSchedulingFlag::WS_On), cl::desc("Set how to use window scheduling algorithm."), cl::values(clEnumValN(WindowSchedulingFlag::WS_Off, "off", "Turn off window algorithm."), clEnumValN(WindowSchedulingFlag::WS_On, "on", "Use window algorithm after SMS algorithm fails."), clEnumValN(WindowSchedulingFlag::WS_Force, "force", "Use window algorithm instead of SMS algorithm.")))
A command line argument to set the window scheduling option.
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
static bool hasLoopCarriedMemDep(const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst, BatchAAResults &BAA, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, const SwingSchedulerDAG *SSD, bool PerformCheapCheck)
Returns true if there is a loop-carried order dependency from Src to Dst.
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
static cl::opt< int > SwpIISearchRange("pipeliner-ii-search-range", cl::desc("Range to search for II"), cl::Hidden, cl::init(10))
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited, SwingSchedulerDDG *DDG)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
static cl::opt< bool > LimitRegPressure("pipeliner-register-pressure", cl::Hidden, cl::init(false), cl::desc("Limit register pressure of scheduled loop"))
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
static Register findUniqueOperandDefinedInLoop(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
Promote Memory to Register
This file provides utility analysis objects describing memory locations.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PriorityQueue class.
Remove Loads Into Fake Uses
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Target-Independent Code Generator Pass Configuration Options pass.
Add loop-carried chain dependencies.
void computeDependencies()
The main function to compute loop-carried order-dependencies.
const BitVector & getLoopCarried(unsigned Idx) const
LoopCarriedOrderDepsTracker(SwingSchedulerDAG *SSD, BatchAAResults *BAA, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
AttributeList getAttributes() const
Return the attribute list for this Function.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isPostIncrement(const MachineInstr &MI) const override
Return true for post-incremented instructions.
DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &STI) const override
Create machine specific model for scheduling.
bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const override
For instructions with a base and offset, return the position of the base register and offset operands...
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
bool isEmpty() const
Returns true if there are no itineraries.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
TypeSize getValue() const
Represents a single loop in the control flow graph.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI StringRef getString() const
MachineInstrBundleIterator< const MachineInstr > const_iterator
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
filtered_mop_range all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool isRegSequence() const
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
const Value * getValue() const
Return the base address of the memory access.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
The main class in the implementation of the target independent software pipeliner pass.
bool runOnMachineFunction(MachineFunction &MF) override
The "main" function for implementing Swing Modulo Scheduling.
const TargetInstrInfo * TII
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineDominatorTree * MDT
const MachineLoopInfo * MLI
MachineOptimizationRemarkEmitter * ORE
RegisterClassInfo RegClassInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_instr_iterator< true, false, false, true > use_instr_iterator
use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the specified register,...
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
Expand the kernel using modulo variable expansion algorithm (MVE).
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
SUnit * getNode(unsigned i) const
void print(raw_ostream &os) const
void setRecMII(unsigned mii)
unsigned count(SUnit *SU) const
void setColocate(unsigned c)
int compareRecMII(NodeSet &RHS)
LLVM_DUMP_METHOD void dump() const
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A reimplementation of ModuloScheduleExpander.
PointerIntPair - This class implements a pair of a pointer and small integer.
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void addLiveRegs(ArrayRef< VRegMaskOrUnit > Regs)
Force liveness of virtual registers or physical register units.
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
int calculateResMII() const
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
void init(int II)
Initialize resources with the initiation interval II.
bool canReserveResources(SUnit &SU, int Cycle)
Check if the resources occupied by a machine instruction are available in the current state.
Kind
These are the different kinds of scheduling dependencies.
@ Order
Any other ordering dependency.
@ Anti
A register anti-dependence (aka WAR).
@ Data
Regular data dependence (aka true-dependence).
void setLatency(unsigned Lat)
Sets the latency for this edge.
@ Barrier
An unknown scheduling barrier.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
This class represents the scheduled code.
std::deque< SUnit * > reorderInstructions(const SwingSchedulerDAG *SSD, const std::deque< SUnit * > &Instrs) const
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
void dump() const
Utility function used for debugging to print the schedule.
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
int earliestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the earliest scheduled instruction in the dependence chain.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU, const SwingSchedulerDDG *DDG) const
Return true if all scheduled predecessors are loop-carried output/order dependencies.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts) const
Order the instructions within a cycle so that the definitions occur before the uses.
bool isValidSchedule(SwingSchedulerDAG *SSD)
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
DenseMap< int, std::deque< SUnit * > >::const_iterator const_sched_iterator
bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO) const
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
SmallPtrSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Determine transitive dependences of unpipelineable instructions.
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const
Return true if the scheduled Phi has a loop carried operand.
int latestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the latest scheduled instruction in the dependence chain.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
LLVM_ABI void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
unsigned short Latency
Node latency.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
bool hasPhysRegDefs
Has physreg defs that are being used.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
LLVM_ABI bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock * BB
The block in which to insert instructions.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
void dump() const override
LLVM_ABI void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
LLVM_ABI bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SUnit EntrySU
Special node for the region entry.
MachineFunction & MF
Machine function.
SUnit ExitSU
Special node for the region exit.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void insert_range(Range &&R)
typename vector_type::const_iterator iterator
void clear()
Completely clear the SetVector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
const SwingSchedulerDDG * getDDG() const
void finishBlock() override
Clean up after the software pipeliner runs.
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const
Return true for an order or output dependence that is loop carried potentially.
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
bool mayOverlapInLaterIter(const MachineInstr *BaseMI, const MachineInstr *OtherMI) const
Return false if there is no overlap between the region accessed by BaseMI in an iteration and the reg...
Register getInstrBaseReg(SUnit *SU) const
Return the new base register that was stored away for the changed instruction.
Represents a dependence between two instruction.
SUnit * getDst() const
Returns the SUnit to which the edge points (destination node).
bool ignoreDependence(bool IgnoreAnti) const
Returns true for DDG nodes that we ignore when computing the cost functions.
SUnit * getSrc() const
Returns the SUnit from which the edge comes (source node).
This class provides APIs to retrieve edges from/to an SUnit node, with a particular focus on loop-car...
SwingSchedulerDDG(std::vector< SUnit > &SUnits, SUnit *EntrySU, SUnit *ExitSU, const LoopCarriedEdges &LCE)
const EdgesType & getInEdges(const SUnit *SU) const
bool isValidSchedule(const SMSchedule &Schedule) const
Check if Schedule doesn't violate the validation-only dependencies.
const EdgesType & getOutEdges(const SUnit *SU) const
Object returned by analyzeLoopForPipelining.
virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const =0
Return true if the given instruction should not be pipelined and should be ignored.
TargetInstrInfo - Interface to description of machine instruction set.
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool enableMachinePipeliner() const
True if the subtarget should run MachinePipeliner.
virtual bool useDFAforSMS() const
Default to DFA for resource management, return false when target will use ProcResource in InstrSchedM...
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
The main class in the implementation of the target independent window scheduler.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< DefNode * > Def
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
std::set< NodeId > NodeSet
friend class Instruction
Iterator for Instructions in a `BasicBlock.
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void stable_sort(R &&Range)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
@ WS_Force
Use window algorithm after SMS algorithm fails.
@ WS_On
Turn off window algorithm.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
unsigned MCRegUnit
Register units are used to compute register aliasing.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
cl::opt< bool > SwpEnableCopyToPhi
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
LLVM_ABI char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
LLVM_ABI bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
int popcount(T Value) noexcept
Count the number of set bits in a value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This class holds an SUnit corresponding to a memory operation and other information related to the in...
const Value * MemOpValue
The value of a memory operand.
SmallVector< const Value *, 2 > UnderlyingObjs
bool isTriviallyDisjoint(const SUnitWithMemInfo &Other) const
int64_t MemOpOffset
The offset of a memory operand.
bool IsAllIdentified
True if all the underlying objects are identified.
SUnitWithMemInfo(SUnit *SU)
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
uint64_t FuncUnits
Bitmask representing a set of functional units.
static constexpr LaneBitmask getNone()
Represents loop-carried dependencies.
SmallSetVector< SUnit *, 8 > OrderDep
const OrderDep * getOrderDepOrNull(SUnit *Key) const
void modifySUnits(std::vector< SUnit > &SUnits, const TargetInstrInfo *TII)
Adds some edges to the original DAG that correspond to loop-carried dependencies.
void dump(SUnit *SU, const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI) const
Define a kind of processor resource that will be modeled by the scheduler.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.