72#include "llvm/Config/llvm-config.h"
100#define DEBUG_TYPE "pipeliner"
102STATISTIC(NumTrytoPipeline,
"Number of loops that we attempt to pipeline");
103STATISTIC(NumPipelined,
"Number of loops software pipelined");
104STATISTIC(NumNodeOrderIssues,
"Number of node order issues found");
105STATISTIC(NumFailBranch,
"Pipeliner abort due to unknown branch");
106STATISTIC(NumFailLoop,
"Pipeliner abort due to unsupported loop");
107STATISTIC(NumFailPreheader,
"Pipeliner abort due to missing preheader");
108STATISTIC(NumFailLargeMaxMII,
"Pipeliner abort due to MaxMII too large");
109STATISTIC(NumFailZeroMII,
"Pipeliner abort due to zero MII");
110STATISTIC(NumFailNoSchedule,
"Pipeliner abort due to no schedule found");
111STATISTIC(NumFailZeroStage,
"Pipeliner abort due to zero stage");
112STATISTIC(NumFailLargeMaxStage,
"Pipeliner abort due to too many stages");
113STATISTIC(NumFailTooManyStores,
"Pipeliner abort due to too many stores");
117 cl::desc(
"Enable Software Pipelining"));
126 cl::desc(
"Size limit for the MII."),
132 cl::desc(
"Force pipeliner to use specified II."),
138 cl::desc(
"Maximum stages allowed in the generated scheduled."),
145 cl::desc(
"Prune dependences between unrelated Phi nodes."),
152 cl::desc(
"Prune loop carried order dependences."),
170 cl::desc(
"Instead of emitting the pipelined code, annotate instructions "
171 "with the generated schedule for feeding into the "
172 "-modulo-schedule-test pass"));
177 "Use the experimental peeling code generator for software pipelining"));
185 cl::desc(
"Limit register pressure of scheduled loop"));
190 cl::desc(
"Margin representing the unused percentage of "
191 "the register pressure limit"));
195 cl::desc(
"Use the MVE code generator for software pipelining"));
200 "pipeliner-max-num-stores",
209 cl::desc(
"Enable CopyToPhi DAG Mutation"));
214 "pipeliner-force-issue-width",
221 cl::desc(
"Set how to use window scheduling algorithm."),
223 "Turn off window algorithm."),
225 "Use window algorithm after SMS algorithm fails."),
227 "Use window algorithm instead of SMS algorithm.")));
231unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
239 "Modulo Software Pipelining",
false,
false)
281 enum class InstrTag {
290 TaggedSUnit(
SUnit *SU, InstrTag Tag)
293 InstrTag
getTag()
const {
return InstrTag(getInt()); }
297 struct LoadStoreChunk {
301 void append(
SUnit *SU);
306 std::vector<SUnit> &SUnits;
312 std::vector<BitVector> LoopCarried;
325 std::vector<TaggedSUnit> TaggedSUnits;
339 return LoopCarried[Idx];
344 std::optional<InstrTag> getInstrTag(
SUnit *SU)
const;
346 void addLoopCarriedDepenenciesForChunks(
const LoadStoreChunk &From,
347 const LoadStoreChunk &To);
358 bool PerformCheapCheck =
false);
360 void computeDependenciesAux();
391 TII =
MF->getSubtarget().getInstrInfo();
394 for (
const auto &L : *
MLI)
404bool MachinePipeliner::scheduleLoop(
MachineLoop &L) {
406 for (
const auto &InnerLoop : L)
407 Changed |= scheduleLoop(*InnerLoop);
419 setPragmaPipelineOptions(L);
420 if (!canPipelineLoop(L)) {
424 L.getStartLoc(), L.getHeader())
425 <<
"Failed to pipeline loop";
428 LI.LoopPipelinerInfo.reset();
433 if (useSwingModuloScheduler())
434 Changed = swingModuloScheduler(L);
436 if (useWindowScheduler(
Changed))
437 Changed = runWindowScheduler(L);
439 LI.LoopPipelinerInfo.reset();
443void MachinePipeliner::setPragmaPipelineOptions(
MachineLoop &L) {
448 MachineBasicBlock *LBLK =
L.getTopBlock();
461 MDNode *LoopID = TI->
getMetadata(LLVMContext::MD_loop);
462 if (LoopID ==
nullptr)
479 if (S->
getString() ==
"llvm.loop.pipeline.initiationinterval") {
481 "Pipeline initiation interval hint metadata should have two operands.");
485 }
else if (S->
getString() ==
"llvm.loop.pipeline.disable") {
494bool MachinePipeliner::canPipelineLoop(
MachineLoop &L) {
495 if (
L.getNumBlocks() != 1) {
497 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
498 L.getStartLoc(),
L.getHeader())
499 <<
"Not a single basic block: "
500 <<
ore::NV(
"NumBlocks",
L.getNumBlocks());
507 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
508 L.getStartLoc(),
L.getHeader())
509 <<
"Disabled by Pragma.";
519 if (
TII->analyzeBranch(*
L.getHeader(),
LI.TBB,
LI.FBB,
LI.BrCond)) {
520 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeBranch, can NOT pipeline Loop\n");
523 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
524 L.getStartLoc(),
L.getHeader())
525 <<
"The branch can't be understood";
530 LI.LoopInductionVar =
nullptr;
531 LI.LoopCompare =
nullptr;
532 LI.LoopPipelinerInfo =
TII->analyzeLoopForPipelining(
L.getTopBlock());
533 if (!
LI.LoopPipelinerInfo) {
534 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeLoop, can NOT pipeline Loop\n");
537 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
538 L.getStartLoc(),
L.getHeader())
539 <<
"The loop structure is not supported";
544 if (!
L.getLoopPreheader()) {
545 LLVM_DEBUG(
dbgs() <<
"Preheader not found, can NOT pipeline Loop\n");
548 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
549 L.getStartLoc(),
L.getHeader())
550 <<
"No loop preheader found";
555 unsigned NumStores = 0;
556 for (MachineInstr &
MI : *
L.getHeader())
561 NumFailTooManyStores++;
563 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
564 L.getStartLoc(),
L.getHeader())
565 <<
"Too many store instructions in the loop: "
566 <<
ore::NV(
"NumStores", NumStores) <<
" > "
573 preprocessPhiNodes(*
L.getHeader());
578 MachineRegisterInfo &
MRI =
MF->getRegInfo();
582 for (MachineInstr &PI :
B.phis()) {
583 MachineOperand &DefOp = PI.getOperand(0);
585 auto *RC =
MRI.getRegClass(DefOp.
getReg());
587 for (
unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
588 MachineOperand &RegOp = PI.getOperand(i);
595 MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
612bool MachinePipeliner::swingModuloScheduler(
MachineLoop &L) {
613 assert(
L.getBlocks().size() == 1 &&
"SMS works on single blocks only.");
616 SwingSchedulerDAG SMS(
620 MachineBasicBlock *
MBB =
L.getHeader();
638 return SMS.hasNewSchedule();
652bool MachinePipeliner::runWindowScheduler(
MachineLoop &L) {
660 Context.RegClassInfo->runOnMachineFunction(*
MF);
665bool MachinePipeliner::useSwingModuloScheduler() {
670bool MachinePipeliner::useWindowScheduler(
bool Changed) {
677 "llvm.loop.pipeline.initiationinterval is set.\n");
685void SwingSchedulerDAG::setMII(
unsigned ResMII,
unsigned RecMII) {
688 else if (II_setByPragma > 0)
689 MII = II_setByPragma;
691 MII = std::max(ResMII, RecMII);
694void SwingSchedulerDAG::setMAX_II() {
697 else if (II_setByPragma > 0)
698 MAX_II = II_setByPragma;
708 updatePhiDependences();
709 Topo.InitDAGTopologicalSorting();
715 dbgs() <<
"===== Loop Carried Edges Begin =====\n";
718 dbgs() <<
"===== Loop Carried Edges End =====\n";
721 NodeSetType NodeSets;
722 findCircuits(NodeSets);
723 NodeSetType Circuits = NodeSets;
726 unsigned ResMII = calculateResMII();
727 unsigned RecMII = calculateRecMII(NodeSets);
735 setMII(ResMII, RecMII);
739 <<
" (rec=" << RecMII <<
", res=" << ResMII <<
")\n");
745 Pass.ORE->emit([&]() {
747 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
748 <<
"Invalid Minimal Initiation Interval: 0";
756 <<
", we don't pipeline large loops\n");
757 NumFailLargeMaxMII++;
758 Pass.ORE->emit([&]() {
760 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
761 <<
"Minimal Initiation Interval too large: "
762 <<
ore::NV(
"MII", (
int)MII) <<
" > "
764 <<
"Refer to -pipeliner-max-mii.";
769 computeNodeFunctions(NodeSets);
771 registerPressureFilter(NodeSets);
773 colocateNodeSets(NodeSets);
775 checkNodeSets(NodeSets);
778 for (
auto &
I : NodeSets) {
779 dbgs() <<
" Rec NodeSet ";
786 groupRemainingNodes(NodeSets);
788 removeDuplicateNodes(NodeSets);
791 for (
auto &
I : NodeSets) {
792 dbgs() <<
" NodeSet ";
797 computeNodeOrder(NodeSets);
800 checkValidNodeOrder(Circuits);
803 Scheduled = schedulePipeline(Schedule);
808 Pass.ORE->emit([&]() {
810 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
811 <<
"Unable to find schedule";
818 if (numStages == 0) {
821 Pass.ORE->emit([&]() {
823 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
824 <<
"No need to pipeline - no overlapped iterations in schedule.";
831 <<
" : too many stages, abort\n");
832 NumFailLargeMaxStage++;
833 Pass.ORE->emit([&]() {
835 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
836 <<
"Too many stages in schedule: "
837 <<
ore::NV(
"numStages", (
int)numStages) <<
" > "
839 <<
". Refer to -pipeliner-max-stages.";
844 Pass.ORE->emit([&]() {
847 <<
"Pipelined succesfully!";
852 std::vector<MachineInstr *> OrderedInsts;
856 OrderedInsts.push_back(SU->getInstr());
857 Cycles[SU->getInstr()] =
Cycle;
862 for (
auto &KV : NewMIs) {
863 Cycles[KV.first] = Cycles[KV.second];
864 Stages[KV.first] = Stages[KV.second];
865 NewInstrChanges[KV.first] = InstrChanges[
getSUnit(KV.first)];
872 "Cannot serialize a schedule with InstrChanges!");
882 LoopPipelinerInfo->isMVEExpanderSupported() &&
896 for (
auto &KV : NewMIs)
897 MF.deleteMachineInstr(KV.second);
908 assert(Phi.isPHI() &&
"Expecting a Phi.");
912 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
913 if (Phi.getOperand(i + 1).getMBB() !=
Loop)
914 InitVal = Phi.getOperand(i).getReg();
916 LoopVal = Phi.getOperand(i).getReg();
918 assert(InitVal && LoopVal &&
"Unexpected Phi structure.");
924 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
925 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
926 return Phi.getOperand(i).getReg();
935 while (!Worklist.
empty()) {
937 for (
const auto &
SI : SU->
Succs) {
940 if (Visited.
count(SuccSU))
953 if (!getUnderlyingObjects())
978bool SUnitWithMemInfo::getUnderlyingObjects() {
980 if (!
MI->hasOneMemOperand())
1002 if (Src.isTriviallyDisjoint(Dst))
1009 if (PerformCheapCheck) {
1016 int64_t Offset1, Offset2;
1017 bool Offset1IsScalable, Offset2IsScalable;
1018 if (
TII->getMemOperandWithOffset(SrcMI, BaseOp1, Offset1, Offset1IsScalable,
1020 TII->getMemOperandWithOffset(DstMI, BaseOp2, Offset2, Offset2IsScalable,
1023 Offset1IsScalable == Offset2IsScalable &&
1024 (
int)Offset1 < (
int)Offset2) {
1025 assert(
TII->areMemAccessesTriviallyDisjoint(SrcMI, DstMI) &&
1026 "What happened to the chain edge?");
1038 if (Src.isUnknown() || Dst.isUnknown())
1040 if (Src.MemOpValue == Dst.MemOpValue && Src.MemOpOffset <= Dst.MemOpOffset)
1051 for (
const Value *SrcObj : Src.UnderlyingObjs)
1052 for (
const Value *DstObj : Dst.UnderlyingObjs)
1060void LoopCarriedOrderDepsTracker::LoadStoreChunk::append(SUnit *SU) {
1062 if (!
MI->mayLoadOrStore())
1064 (
MI->mayStore() ? Stores : Loads).emplace_back(SU);
1070 : DAG(SSD), BAA(BAA), SUnits(DAG->SUnits), N(SUnits.
size()),
1071 LoopCarried(N,
BitVector(N)), TII(TII), TRI(TRI) {}
1075 for (
auto &SU : SUnits) {
1076 auto Tagged = getInstrTag(&SU);
1081 TaggedSUnits.emplace_back(&SU, *Tagged);
1084 computeDependenciesAux();
1087std::optional<LoopCarriedOrderDepsTracker::InstrTag>
1088LoopCarriedOrderDepsTracker::getInstrTag(
SUnit *SU)
const {
1090 if (
TII->isGlobalMemoryObject(
MI))
1091 return InstrTag::Barrier;
1093 if (
MI->mayStore() ||
1094 (
MI->mayLoad() && !
MI->isDereferenceableInvariantLoad()))
1095 return InstrTag::LoadOrStore;
1097 if (
MI->mayRaiseFPException())
1098 return InstrTag::FPExceptions;
1100 return std::nullopt;
1103void LoopCarriedOrderDepsTracker::addDependenciesBetweenSUs(
1104 const SUnitWithMemInfo &Src,
const SUnitWithMemInfo &Dst,
1105 bool PerformCheapCheck) {
1107 if (Src.SU == Dst.SU)
1111 LoopCarried[Src.SU->NodeNum].set(Dst.SU->NodeNum);
1114void LoopCarriedOrderDepsTracker::addLoopCarriedDepenenciesForChunks(
1115 const LoadStoreChunk &From,
const LoadStoreChunk &To) {
1117 for (
const SUnitWithMemInfo &Src : From.Loads)
1118 for (
const SUnitWithMemInfo &Dst : To.Stores)
1120 addDependenciesBetweenSUs(Src, Dst, Src.SU->NodeNum < Dst.SU->NodeNum);
1123 for (
const SUnitWithMemInfo &Src : From.Stores)
1124 for (
const SUnitWithMemInfo &Dst : To.Loads)
1125 addDependenciesBetweenSUs(Src, Dst);
1128 for (
const SUnitWithMemInfo &Src : From.Stores)
1129 for (
const SUnitWithMemInfo &Dst : To.Stores)
1130 addDependenciesBetweenSUs(Src, Dst);
1133void LoopCarriedOrderDepsTracker::computeDependenciesAux() {
1135 for (
const auto &TSU : TaggedSUnits) {
1136 InstrTag
Tag = TSU.getTag();
1137 SUnit *SU = TSU.getPointer();
1139 case InstrTag::Barrier:
1140 Chunks.emplace_back();
1142 case InstrTag::LoadOrStore:
1143 Chunks.back().append(SU);
1145 case InstrTag::FPExceptions:
1154 for (
const LoadStoreChunk &Chunk : Chunks)
1155 addLoopCarriedDepenenciesForChunks(Chunk, Chunk);
1167LoopCarriedEdges SwingSchedulerDAG::addLoopCarriedDependences() {
1168 LoopCarriedEdges LCE;
1172 LCODTracker.computeDependencies();
1173 for (
unsigned I = 0;
I != SUnits.size();
I++)
1174 for (
const int Succ : LCODTracker.getLoopCarried(
I).set_bits())
1187void SwingSchedulerDAG::updatePhiDependences() {
1189 const TargetSubtargetInfo &
ST = MF.getSubtarget<TargetSubtargetInfo>();
1192 for (SUnit &
I : SUnits) {
1197 MachineInstr *
MI =
I.getInstr();
1199 for (
const MachineOperand &MO :
MI->operands()) {
1206 UI =
MRI.use_instr_begin(
Reg),
1207 UE =
MRI.use_instr_end();
1209 MachineInstr *
UseMI = &*UI;
1210 SUnit *SU = getSUnit(
UseMI);
1220 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
1225 }
else if (MO.isUse()) {
1228 if (
DefMI ==
nullptr)
1230 SUnit *SU = getSUnit(
DefMI);
1235 ST.adjustSchedDependency(SU, 0, &
I, MO.getOperandNo(), Dep,
1242 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
1251 for (
auto &PI :
I.Preds) {
1252 MachineInstr *PMI = PI.getSUnit()->getInstr();
1254 if (
I.getInstr()->isPHI()) {
1263 for (
const SDep &
D : RemoveDeps)
1270void SwingSchedulerDAG::changeDependences() {
1274 for (SUnit &
I : SUnits) {
1275 unsigned BasePos = 0, OffsetPos = 0;
1277 int64_t NewOffset = 0;
1278 if (!canUseLastOffsetValue(
I.getInstr(), BasePos, OffsetPos, NewBase,
1283 Register OrigBase =
I.getInstr()->getOperand(BasePos).getReg();
1284 MachineInstr *
DefMI =
MRI.getUniqueVRegDef(OrigBase);
1287 SUnit *DefSU = getSUnit(
DefMI);
1291 MachineInstr *LastMI =
MRI.getUniqueVRegDef(NewBase);
1294 SUnit *LastSU = getSUnit(LastMI);
1298 if (Topo.IsReachable(&
I, LastSU))
1303 for (
const SDep &
P :
I.Preds)
1304 if (
P.getSUnit() == DefSU)
1306 for (
const SDep &
D : Deps) {
1307 Topo.RemovePred(&
I,
D.getSUnit());
1312 for (
auto &
P : LastSU->
Preds)
1315 for (
const SDep &
D : Deps) {
1316 Topo.RemovePred(LastSU,
D.getSUnit());
1323 Topo.AddPred(LastSU, &
I);
1328 InstrChanges[&
I] = std::make_pair(NewBase, NewOffset);
1339 std::vector<MachineInstr *> &OrderedInsts,
1347 Stage <= LastStage; ++Stage) {
1350 Instrs[
Cycle].push_front(SU);
1357 std::deque<SUnit *> &CycleInstrs = Instrs[
Cycle];
1359 for (
SUnit *SU : CycleInstrs) {
1361 OrderedInsts.push_back(
MI);
1371struct FuncUnitSorter {
1372 const InstrItineraryData *InstrItins;
1373 const MCSubtargetInfo *STI;
1374 DenseMap<InstrStage::FuncUnits, unsigned> Resources;
1376 FuncUnitSorter(
const TargetSubtargetInfo &TSI)
1377 : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
1382 unsigned minFuncUnits(
const MachineInstr *Inst,
1385 unsigned min = UINT_MAX;
1386 if (InstrItins && !InstrItins->
isEmpty()) {
1387 for (
const InstrStage &IS :
1389 InstrItins->
endStage(SchedClass))) {
1392 if (numAlternatives < min) {
1393 min = numAlternatives;
1400 const MCSchedClassDesc *SCDesc =
1407 for (
const MCWriteProcResEntry &PRE :
1410 if (!PRE.ReleaseAtCycle)
1412 const MCProcResourceDesc *ProcResource =
1414 unsigned NumUnits = ProcResource->
NumUnits;
1415 if (NumUnits < min) {
1417 F = PRE.ProcResourceIdx;
1422 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1430 void calcCriticalResources(MachineInstr &
MI) {
1431 unsigned SchedClass =
MI.getDesc().getSchedClass();
1432 if (InstrItins && !InstrItins->
isEmpty()) {
1433 for (
const InstrStage &IS :
1435 InstrItins->
endStage(SchedClass))) {
1438 Resources[FuncUnits]++;
1443 const MCSchedClassDesc *SCDesc =
1450 for (
const MCWriteProcResEntry &PRE :
1453 if (!PRE.ReleaseAtCycle)
1455 Resources[PRE.ProcResourceIdx]++;
1459 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1463 bool operator()(
const MachineInstr *IS1,
const MachineInstr *IS2)
const {
1465 unsigned MFUs1 = minFuncUnits(IS1, F1);
1466 unsigned MFUs2 = minFuncUnits(IS2, F2);
1469 return MFUs1 > MFUs2;
1474class HighRegisterPressureDetector {
1475 MachineBasicBlock *OrigMBB;
1476 const MachineRegisterInfo &
MRI;
1477 const TargetRegisterInfo *
TRI;
1479 const unsigned PSetNum;
1485 std::vector<unsigned> InitSetPressure;
1489 std::vector<unsigned> PressureSetLimit;
1491 DenseMap<MachineInstr *, RegisterOperands> ROMap;
1493 using Instr2LastUsesTy = DenseMap<MachineInstr *, SmallDenseSet<Register, 4>>;
1496 using OrderedInstsTy = std::vector<MachineInstr *>;
1497 using Instr2StageTy = DenseMap<MachineInstr *, unsigned>;
1500 static void dumpRegisterPressures(
const std::vector<unsigned> &Pressures) {
1501 if (Pressures.size() == 0) {
1505 for (
unsigned P : Pressures) {
1515 for (
auto PSetIter =
MRI.getPressureSets(
Reg); PSetIter.isValid();
1517 dbgs() << *PSetIter <<
' ';
1522 void increaseRegisterPressure(std::vector<unsigned> &Pressure,
1524 auto PSetIter =
MRI.getPressureSets(
Reg);
1525 unsigned Weight = PSetIter.getWeight();
1526 for (; PSetIter.isValid(); ++PSetIter)
1527 Pressure[*PSetIter] += Weight;
1530 void decreaseRegisterPressure(std::vector<unsigned> &Pressure,
1532 auto PSetIter =
MRI.getPressureSets(
Reg);
1533 unsigned Weight = PSetIter.getWeight();
1534 for (; PSetIter.isValid(); ++PSetIter) {
1535 auto &
P = Pressure[*PSetIter];
1537 "register pressure must be greater than or equal weight");
1559 void computeLiveIn() {
1560 DenseSet<Register>
Used;
1561 for (
auto &
MI : *OrigMBB) {
1562 if (
MI.isDebugInstr())
1564 for (
auto &Use : ROMap[&
MI].
Uses) {
1570 if (isReservedRegister(
Reg))
1572 if (isDefinedInThisLoop(
Reg))
1578 for (
auto LiveIn : Used)
1579 increaseRegisterPressure(InitSetPressure, LiveIn);
1583 void computePressureSetLimit(
const RegisterClassInfo &RCI) {
1584 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1599 Instr2LastUsesTy computeLastUses(
const OrderedInstsTy &OrderedInsts,
1600 Instr2StageTy &Stages)
const {
1605 DenseSet<Register> TargetRegs;
1606 const auto UpdateTargetRegs = [
this, &TargetRegs](
Register Reg) {
1607 if (isDefinedInThisLoop(
Reg))
1610 for (MachineInstr *
MI : OrderedInsts) {
1613 UpdateTargetRegs(
Reg);
1615 for (
auto &Use : ROMap.
find(
MI)->getSecond().Uses)
1616 UpdateTargetRegs(
Use.RegUnit);
1620 const auto InstrScore = [&Stages](MachineInstr *
MI) {
1621 return Stages[
MI] +
MI->isPHI();
1624 DenseMap<Register, MachineInstr *> LastUseMI;
1626 for (
auto &Use : ROMap.
find(
MI)->getSecond().Uses) {
1632 MachineInstr *Orig = Ite->second;
1633 MachineInstr *
New =
MI;
1634 if (InstrScore(Orig) < InstrScore(New))
1640 Instr2LastUsesTy LastUses;
1641 for (
auto &Entry : LastUseMI)
1658 std::vector<unsigned>
1659 computeMaxSetPressure(
const OrderedInstsTy &OrderedInsts,
1660 Instr2StageTy &Stages,
1661 const unsigned StageCount)
const {
1662 using RegSetTy = SmallDenseSet<Register, 16>;
1668 auto CurSetPressure = InitSetPressure;
1669 auto MaxSetPressure = InitSetPressure;
1670 auto LastUses = computeLastUses(OrderedInsts, Stages);
1673 dbgs() <<
"Ordered instructions:\n";
1674 for (MachineInstr *
MI : OrderedInsts) {
1675 dbgs() <<
"Stage " << Stages[
MI] <<
": ";
1680 const auto InsertReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1690 increaseRegisterPressure(CurSetPressure,
Reg);
1694 const auto EraseReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1700 if (!RegSet.contains(
Reg))
1705 decreaseRegisterPressure(CurSetPressure,
Reg);
1709 for (
unsigned I = 0;
I < StageCount;
I++) {
1710 for (MachineInstr *
MI : OrderedInsts) {
1711 const auto Stage = Stages[
MI];
1715 const unsigned Iter =
I - Stage;
1717 for (
auto &Def : ROMap.
find(
MI)->getSecond().Defs)
1718 InsertReg(LiveRegSets[Iter],
Def.RegUnit);
1720 for (
auto LastUse : LastUses[
MI]) {
1723 EraseReg(LiveRegSets[Iter - 1], LastUse);
1725 EraseReg(LiveRegSets[Iter], LastUse);
1729 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1730 MaxSetPressure[PSet] =
1731 std::max(MaxSetPressure[PSet], CurSetPressure[PSet]);
1734 dbgs() <<
"CurSetPressure=";
1735 dumpRegisterPressures(CurSetPressure);
1736 dbgs() <<
" iter=" << Iter <<
" stage=" << Stage <<
":";
1742 return MaxSetPressure;
1746 HighRegisterPressureDetector(MachineBasicBlock *OrigMBB,
1747 const MachineFunction &MF)
1748 : OrigMBB(OrigMBB),
MRI(MF.getRegInfo()),
1749 TRI(MF.getSubtarget().getRegisterInfo()),
1750 PSetNum(
TRI->getNumRegPressureSets()), InitSetPressure(PSetNum, 0),
1751 PressureSetLimit(PSetNum, 0) {}
1755 void init(
const RegisterClassInfo &RCI) {
1756 for (MachineInstr &
MI : *OrigMBB) {
1757 if (
MI.isDebugInstr())
1759 ROMap[&
MI].collect(
MI, *
TRI,
MRI,
false,
true);
1763 computePressureSetLimit(RCI);
1768 bool detect(
const SwingSchedulerDAG *SSD, SMSchedule &Schedule,
1769 const unsigned MaxStage)
const {
1771 "the percentage of the margin must be between 0 to 100");
1773 OrderedInstsTy OrderedInsts;
1774 Instr2StageTy Stages;
1776 const auto MaxSetPressure =
1777 computeMaxSetPressure(OrderedInsts, Stages, MaxStage + 1);
1780 dbgs() <<
"Dump MaxSetPressure:\n";
1781 for (
unsigned I = 0;
I < MaxSetPressure.size();
I++) {
1782 dbgs() <<
format(
"MaxSetPressure[%d]=%d\n",
I, MaxSetPressure[
I]);
1787 for (
unsigned PSet = 0; PSet < PSetNum; PSet++) {
1788 unsigned Limit = PressureSetLimit[PSet];
1791 <<
" Margin=" << Margin <<
"\n");
1792 if (Limit < MaxSetPressure[PSet] + Margin) {
1795 <<
"Rejected the schedule because of too high register pressure\n");
1811unsigned SwingSchedulerDAG::calculateResMII() {
1813 ResourceManager
RM(&MF.getSubtarget(),
this);
1814 return RM.calculateResMII();
1823unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1824 unsigned RecMII = 0;
1826 for (NodeSet &Nodes : NodeSets) {
1830 unsigned Delay = Nodes.getLatency();
1831 unsigned Distance = 1;
1834 unsigned CurMII = (Delay + Distance - 1) / Distance;
1835 Nodes.setRecMII(CurMII);
1836 if (CurMII > RecMII)
1844void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1845 SwingSchedulerDAG *DAG) {
1846 BitVector
Added(SUnits.size());
1847 DenseMap<int, int> OutputDeps;
1848 for (
int i = 0, e = SUnits.size(); i != e; ++i) {
1851 for (
auto &OE : DAG->DDG->getOutEdges(&SUnits[i])) {
1854 if (OE.isOutputDep()) {
1855 int N = OE.getDst()->NodeNum;
1857 auto Dep = OutputDeps.
find(BackEdge);
1858 if (Dep != OutputDeps.
end()) {
1859 BackEdge = Dep->second;
1860 OutputDeps.
erase(Dep);
1862 OutputDeps[
N] = BackEdge;
1865 if (OE.getDst()->isBoundaryNode() || OE.isArtificial())
1877 int N = OE.getDst()->NodeNum;
1879 AdjK[i].push_back(
N);
1885 for (
auto &IE : DAG->DDG->getInEdges(&SUnits[i])) {
1886 SUnit *Src =
IE.getSrc();
1887 SUnit *Dst =
IE.getDst();
1890 if (
IE.isOrderDep() && Src->getInstr()->mayLoad()) {
1891 int N = Src->NodeNum;
1893 AdjK[i].push_back(
N);
1900 for (
auto &OD : OutputDeps)
1901 if (!
Added.test(OD.second)) {
1902 AdjK[OD.first].push_back(OD.second);
1903 Added.set(OD.second);
1909bool SwingSchedulerDAG::Circuits::circuit(
int V,
int S, NodeSetType &NodeSets,
1910 const SwingSchedulerDAG *DAG,
1912 SUnit *SV = &SUnits[
V];
1917 for (
auto W : AdjK[V]) {
1918 if (NumPaths > MaxPaths)
1929 if (!Blocked.test(W)) {
1930 if (circuit(W, S, NodeSets, DAG,
1931 Node2Idx->at(W) < Node2Idx->at(V) ?
true : HasBackedge))
1939 for (
auto W : AdjK[V]) {
1950void SwingSchedulerDAG::Circuits::unblock(
int U) {
1952 SmallPtrSet<SUnit *, 4> &BU =
B[
U];
1953 while (!BU.
empty()) {
1954 SmallPtrSet<SUnit *, 4>::iterator
SI = BU.
begin();
1955 assert(SI != BU.
end() &&
"Invalid B set.");
1958 if (Blocked.test(
W->NodeNum))
1959 unblock(
W->NodeNum);
1965void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1966 Circuits Cir(SUnits, Topo);
1968 Cir.createAdjacencyStructure(
this);
1969 for (
int I = 0,
E = SUnits.size();
I !=
E; ++
I) {
1971 Cir.circuit(
I,
I, NodeSets,
this);
1993void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
1994 for (SUnit &SU : DAG->
SUnits) {
2004 for (
auto &Dep : SU.
Preds) {
2005 SUnit *TmpSU = Dep.getSUnit();
2006 MachineInstr *TmpMI = TmpSU->
getInstr();
2017 if (PHISUs.
size() == 0 || SrcSUs.
size() == 0)
2025 for (
auto &Dep : PHISUs[Index]->Succs) {
2029 SUnit *TmpSU = Dep.getSUnit();
2030 MachineInstr *TmpMI = TmpSU->
getInstr();
2039 if (UseSUs.
size() == 0)
2044 for (
auto *
I : UseSUs) {
2045 for (
auto *Src : SrcSUs) {
2061void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
2062 ScheduleInfo.resize(SUnits.size());
2065 for (
int I : Topo) {
2066 const SUnit &SU = SUnits[
I];
2073 for (
int I : Topo) {
2075 int zeroLatencyDepth = 0;
2076 SUnit *SU = &SUnits[
I];
2077 for (
const auto &IE : DDG->getInEdges(SU)) {
2078 SUnit *Pred =
IE.getSrc();
2079 if (
IE.getLatency() == 0)
2081 std::max(zeroLatencyDepth, getZeroLatencyDepth(Pred) + 1);
2082 if (
IE.ignoreDependence(
true))
2084 asap = std::max(asap, (
int)(getASAP(Pred) +
IE.getLatency() -
2085 IE.getDistance() * MII));
2087 maxASAP = std::max(maxASAP, asap);
2088 ScheduleInfo[
I].ASAP = asap;
2089 ScheduleInfo[
I].ZeroLatencyDepth = zeroLatencyDepth;
2095 int zeroLatencyHeight = 0;
2096 SUnit *SU = &SUnits[
I];
2097 for (
const auto &OE : DDG->getOutEdges(SU)) {
2098 SUnit *Succ = OE.getDst();
2101 if (OE.getLatency() == 0)
2103 std::max(zeroLatencyHeight, getZeroLatencyHeight(Succ) + 1);
2104 if (OE.ignoreDependence(
true))
2106 alap = std::min(alap, (
int)(getALAP(Succ) - OE.getLatency() +
2107 OE.getDistance() * MII));
2110 ScheduleInfo[
I].ALAP = alap;
2111 ScheduleInfo[
I].ZeroLatencyHeight = zeroLatencyHeight;
2115 for (NodeSet &
I : NodeSets)
2116 I.computeNodeSetInfo(
this);
2119 for (
unsigned i = 0; i < SUnits.size(); i++) {
2120 dbgs() <<
"\tNode " << i <<
":\n";
2121 dbgs() <<
"\t ASAP = " << getASAP(&SUnits[i]) <<
"\n";
2122 dbgs() <<
"\t ALAP = " << getALAP(&SUnits[i]) <<
"\n";
2123 dbgs() <<
"\t MOV = " << getMOV(&SUnits[i]) <<
"\n";
2124 dbgs() <<
"\t D = " << getDepth(&SUnits[i]) <<
"\n";
2125 dbgs() <<
"\t H = " << getHeight(&SUnits[i]) <<
"\n";
2126 dbgs() <<
"\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) <<
"\n";
2127 dbgs() <<
"\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) <<
"\n";
2142 SUnit *PredSU = IE.getSrc();
2143 if (S && S->count(PredSU) == 0)
2145 if (IE.ignoreDependence(
true))
2156 SUnit *SuccSU = OE.getDst();
2157 if (!OE.isAntiDep())
2159 if (S && S->count(SuccSU) == 0)
2165 return !Preds.
empty();
2178 SUnit *SuccSU = OE.getDst();
2179 if (S && S->count(SuccSU) == 0)
2181 if (OE.ignoreDependence(
false))
2192 SUnit *PredSU = IE.getSrc();
2193 if (!IE.isAntiDep())
2195 if (S && S->count(PredSU) == 0)
2201 return !Succs.
empty();
2217 if (!Visited.
insert(Cur).second)
2218 return Path.contains(Cur);
2219 bool FoundPath =
false;
2221 if (!OE.ignoreDependence(
false))
2223 computePath(OE.getDst(), Path, DestNodes, Exclude, Visited, DDG);
2225 if (IE.isAntiDep() && IE.getDistance() == 0)
2227 computePath(IE.getSrc(), Path, DestNodes, Exclude, Visited, DDG);
2242 for (
SUnit *SU : NS) {
2248 if (
Reg.isVirtual())
2250 else if (
MRI.isAllocatable(
Reg))
2251 Uses.insert_range(
TRI->regunits(
Reg.asMCReg()));
2254 for (
SUnit *SU : NS)
2258 if (
Reg.isVirtual()) {
2261 }
else if (
MRI.isAllocatable(
Reg)) {
2263 if (!
Uses.count(Unit))
2272void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
2273 for (
auto &NS : NodeSets) {
2277 IntervalPressure RecRegPressure;
2278 RegPressureTracker RecRPTracker(RecRegPressure);
2279 RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(),
false,
true);
2281 RecRPTracker.closeBottom();
2283 std::vector<SUnit *> SUnits(NS.begin(), NS.end());
2284 llvm::sort(SUnits, [](
const SUnit *
A,
const SUnit *
B) {
2285 return A->NodeNum >
B->NodeNum;
2288 for (
auto &SU : SUnits) {
2294 RecRPTracker.setPos(std::next(CurInstI));
2296 RegPressureDelta RPDelta;
2298 RecRPTracker.getMaxUpwardPressureDelta(SU->
getInstr(),
nullptr, RPDelta,
2303 dbgs() <<
"Excess register pressure: SU(" << SU->
NodeNum <<
") "
2306 NS.setExceedPressure(SU);
2309 RecRPTracker.recede();
2316void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
2317 unsigned Colocate = 0;
2318 for (
int i = 0, e = NodeSets.size(); i < e; ++i) {
2320 SmallSetVector<SUnit *, 8>
S1;
2323 for (
int j = i + 1;
j <
e; ++
j) {
2327 SmallSetVector<SUnit *, 8> S2;
2344void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
2349 for (
auto &NS : NodeSets) {
2350 if (NS.getRecMII() > 2)
2352 if (NS.getMaxDepth() > MII)
2361void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
2362 SetVector<SUnit *> NodesAdded;
2363 SmallPtrSet<SUnit *, 8> Visited;
2366 for (NodeSet &
I : NodeSets) {
2367 SmallSetVector<SUnit *, 8>
N;
2370 SetVector<SUnit *>
Path;
2371 for (SUnit *NI :
N) {
2373 computePath(NI, Path, NodesAdded,
I, Visited, DDG.get());
2380 if (
succ_L(NodesAdded,
N, DDG.get())) {
2381 SetVector<SUnit *>
Path;
2382 for (SUnit *NI :
N) {
2384 computePath(NI, Path,
I, NodesAdded, Visited, DDG.get());
2395 SmallSetVector<SUnit *, 8>
N;
2396 if (
succ_L(NodesAdded,
N, DDG.get()))
2398 addConnectedNodes(
I, NewSet, NodesAdded);
2399 if (!NewSet.
empty())
2400 NodeSets.push_back(NewSet);
2405 if (
pred_L(NodesAdded,
N, DDG.get()))
2407 addConnectedNodes(
I, NewSet, NodesAdded);
2408 if (!NewSet.
empty())
2409 NodeSets.push_back(NewSet);
2413 for (SUnit &SU : SUnits) {
2414 if (NodesAdded.
count(&SU) == 0) {
2416 addConnectedNodes(&SU, NewSet, NodesAdded);
2417 if (!NewSet.
empty())
2418 NodeSets.push_back(NewSet);
2424void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
2425 SetVector<SUnit *> &NodesAdded) {
2428 for (
auto &OE : DDG->getOutEdges(SU)) {
2430 if (!OE.isArtificial() && !
Successor->isBoundaryNode() &&
2432 addConnectedNodes(
Successor, NewSet, NodesAdded);
2434 for (
auto &IE : DDG->getInEdges(SU)) {
2435 SUnit *Predecessor =
IE.getSrc();
2436 if (!
IE.isArtificial() && NodesAdded.
count(Predecessor) == 0)
2437 addConnectedNodes(Predecessor, NewSet, NodesAdded);
2446 for (
SUnit *SU : Set1) {
2447 if (Set2.
count(SU) != 0)
2450 return !Result.empty();
2454void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
2455 for (NodeSetType::iterator
I = NodeSets.begin(),
E = NodeSets.end();
I !=
E;
2458 for (NodeSetType::iterator J =
I + 1; J !=
E;) {
2463 for (SUnit *SU : *J)
2475void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
2476 for (NodeSetType::iterator
I = NodeSets.begin(),
E = NodeSets.end();
I !=
E;
2478 for (NodeSetType::iterator J =
I + 1; J !=
E;) {
2479 J->remove_if([&](SUnit *SUJ) {
return I->count(SUJ); });
2494void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
2495 SmallSetVector<SUnit *, 8>
R;
2498 for (
auto &Nodes : NodeSets) {
2501 SmallSetVector<SUnit *, 8>
N;
2516 }
else if (NodeSets.size() == 1) {
2517 for (
const auto &
N : Nodes)
2518 if (
N->Succs.size() == 0)
2524 SUnit *maxASAP =
nullptr;
2525 for (SUnit *SU : Nodes) {
2526 if (maxASAP ==
nullptr || getASAP(SU) > getASAP(maxASAP) ||
2527 (getASAP(SU) == getASAP(maxASAP) && SU->
NodeNum > maxASAP->
NodeNum))
2535 while (!
R.empty()) {
2536 if (Order == TopDown) {
2540 while (!
R.empty()) {
2541 SUnit *maxHeight =
nullptr;
2542 for (SUnit *
I : R) {
2543 if (maxHeight ==
nullptr || getHeight(
I) > getHeight(maxHeight))
2545 else if (getHeight(
I) == getHeight(maxHeight) &&
2546 getZeroLatencyHeight(
I) > getZeroLatencyHeight(maxHeight))
2548 else if (getHeight(
I) == getHeight(maxHeight) &&
2549 getZeroLatencyHeight(
I) ==
2550 getZeroLatencyHeight(maxHeight) &&
2551 getMOV(
I) < getMOV(maxHeight))
2556 R.remove(maxHeight);
2557 for (
const auto &OE : DDG->getOutEdges(maxHeight)) {
2558 SUnit *SU = OE.getDst();
2559 if (Nodes.count(SU) == 0)
2563 if (OE.ignoreDependence(
false))
2572 for (
const auto &IE : DDG->getInEdges(maxHeight)) {
2573 SUnit *SU =
IE.getSrc();
2574 if (!
IE.isAntiDep())
2576 if (Nodes.count(SU) == 0)
2585 SmallSetVector<SUnit *, 8>
N;
2592 while (!
R.empty()) {
2593 SUnit *maxDepth =
nullptr;
2594 for (SUnit *
I : R) {
2595 if (maxDepth ==
nullptr || getDepth(
I) > getDepth(maxDepth))
2597 else if (getDepth(
I) == getDepth(maxDepth) &&
2598 getZeroLatencyDepth(
I) > getZeroLatencyDepth(maxDepth))
2600 else if (getDepth(
I) == getDepth(maxDepth) &&
2601 getZeroLatencyDepth(
I) == getZeroLatencyDepth(maxDepth) &&
2602 getMOV(
I) < getMOV(maxDepth))
2608 if (Nodes.isExceedSU(maxDepth)) {
2611 R.insert(Nodes.getNode(0));
2614 for (
const auto &IE : DDG->getInEdges(maxDepth)) {
2615 SUnit *SU =
IE.getSrc();
2616 if (Nodes.count(SU) == 0)
2627 for (
const auto &OE : DDG->getOutEdges(maxDepth)) {
2628 SUnit *SU = OE.getDst();
2629 if (!OE.isAntiDep())
2631 if (Nodes.count(SU) == 0)
2640 SmallSetVector<SUnit *, 8>
N;
2649 dbgs() <<
"Node order: ";
2651 dbgs() <<
" " <<
I->NodeNum <<
" ";
2658bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2665 bool scheduleFound =
false;
2666 std::unique_ptr<HighRegisterPressureDetector> HRPDetector;
2669 std::make_unique<HighRegisterPressureDetector>(Loop.getHeader(), MF);
2670 HRPDetector->init(RegClassInfo);
2673 for (
unsigned II = MII;
II <= MAX_II && !scheduleFound; ++
II) {
2685 int EarlyStart = INT_MIN;
2686 int LateStart = INT_MAX;
2695 dbgs() <<
format(
"\tes: %8x ls: %8x\n", EarlyStart, LateStart));
2697 if (EarlyStart > LateStart)
2698 scheduleFound =
false;
2699 else if (EarlyStart != INT_MIN && LateStart == INT_MAX)
2701 Schedule.
insert(SU, EarlyStart, EarlyStart + (
int)
II - 1,
II);
2702 else if (EarlyStart == INT_MIN && LateStart != INT_MAX)
2704 Schedule.
insert(SU, LateStart, LateStart - (
int)
II + 1,
II);
2705 else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2706 LateStart = std::min(LateStart, EarlyStart + (
int)
II - 1);
2715 scheduleFound = Schedule.
insert(SU, LateStart, EarlyStart,
II);
2717 scheduleFound = Schedule.
insert(SU, EarlyStart, LateStart,
II);
2720 scheduleFound = Schedule.
insert(SU, FirstCycle + getASAP(SU),
2721 FirstCycle + getASAP(SU) +
II - 1,
II);
2729 scheduleFound =
false;
2733 dbgs() <<
"\tCan't schedule\n";
2735 }
while (++NI != NE && scheduleFound);
2740 scheduleFound = DDG->isValidSchedule(Schedule);
2762 if (scheduleFound) {
2763 scheduleFound = LoopPipelinerInfo->shouldUseSchedule(*
this, Schedule);
2768 if (scheduleFound) {
2770 Pass.ORE->emit([&]() {
2771 return MachineOptimizationRemarkAnalysis(
2772 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
2773 <<
"Schedule found with Initiation Interval: "
2775 <<
", MaxStageCount: "
2789 if (!
Reg.isVirtual())
2791 if (
MRI.getVRegDef(
Reg)->getParent() !=
MI.getParent())
2803 if (!
Op.isReg() || !
Op.getReg().isVirtual())
2831 if (Def->getParent() != LoopBB)
2834 if (Def->isCopy()) {
2836 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
2838 CurReg = Def->getOperand(1).getReg();
2839 }
else if (Def->isPHI()) {
2845 }
else if (
TII->getIncrementValue(*Def,
Value)) {
2853 bool OffsetIsScalable;
2854 if (
TII->getMemOperandWithOffset(*Def, BaseOp,
Offset, OffsetIsScalable,
2857 CurReg = BaseOp->
getReg();
2869 if (CurReg == OrgReg)
2873 if (!Phi || !Increment)
2881bool SwingSchedulerDAG::computeDelta(
const MachineInstr &
MI,
int &Delta)
const {
2882 const TargetRegisterInfo *
TRI = MF.getSubtarget().getRegisterInfo();
2883 const MachineOperand *BaseOp;
2885 bool OffsetIsScalable;
2886 if (!
TII->getMemOperandWithOffset(
MI, BaseOp,
Offset, OffsetIsScalable,
TRI))
2890 if (OffsetIsScalable)
2893 if (!BaseOp->
isReg())
2906bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *
MI,
2908 unsigned &OffsetPos,
2914 unsigned BasePosLd, OffsetPosLd;
2920 MachineRegisterInfo &
MRI =
MI->getMF()->getRegInfo();
2921 MachineInstr *
Phi =
MRI.getVRegDef(BaseReg);
2922 if (!Phi || !
Phi->isPHI())
2930 MachineInstr *PrevDef =
MRI.getVRegDef(PrevReg);
2931 if (!PrevDef || PrevDef ==
MI)
2937 unsigned BasePos1 = 0, OffsetPos1 = 0;
2943 int64_t LoadOffset =
MI->getOperand(OffsetPosLd).getImm();
2945 MachineInstr *NewMI = MF.CloneMachineInstr(
MI);
2948 MF.deleteMachineInstr(NewMI);
2953 BasePos = BasePosLd;
2954 OffsetPos = OffsetPosLd;
2966 InstrChanges.find(SU);
2967 if (It != InstrChanges.
end()) {
2968 std::pair<Register, int64_t> RegAndOffset = It->second;
2969 unsigned BasePos, OffsetPos;
2970 if (!
TII->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos))
2972 Register BaseReg =
MI->getOperand(BasePos).getReg();
2978 if (BaseStageNum < DefStageNum) {
2980 int OffsetDiff = DefStageNum - BaseStageNum;
2981 if (DefCycleNum < BaseCycleNum) {
2987 MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
3002 while (Def->isPHI()) {
3003 if (!Visited.
insert(Def).second)
3005 for (
unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
3006 if (Def->getOperand(i + 1).getMBB() == BB) {
3007 Def =
MRI.getVRegDef(Def->getOperand(i).getReg());
3018 int DeltaB, DeltaO, Delta;
3025 int64_t OffsetB, OffsetO;
3026 bool OffsetBIsScalable, OffsetOIsScalable;
3028 if (!
TII->getMemOperandWithOffset(*BaseMI, BaseOpB, OffsetB,
3029 OffsetBIsScalable,
TRI) ||
3030 !
TII->getMemOperandWithOffset(*OtherMI, BaseOpO, OffsetO,
3031 OffsetOIsScalable,
TRI))
3034 if (OffsetBIsScalable || OffsetOIsScalable)
3044 if (!RegB.
isVirtual() || !RegO.isVirtual())
3049 if (!DefB || !DefO || !DefB->
isPHI() || !DefO->
isPHI())
3074 dbgs() <<
"Overlap check:\n";
3075 dbgs() <<
" BaseMI: ";
3077 dbgs() <<
" Base + " << OffsetB <<
" + I * " << Delta
3078 <<
", Len: " << AccessSizeB.
getValue() <<
"\n";
3079 dbgs() <<
" OtherMI: ";
3081 dbgs() <<
" Base + " << OffsetO <<
" + I * " << Delta
3082 <<
", Len: " << AccessSizeO.
getValue() <<
"\n";
3090 int64_t BaseMinAddr = OffsetB;
3091 int64_t OhterNextIterMaxAddr = OffsetO + Delta + AccessSizeO.
getValue() - 1;
3092 if (BaseMinAddr > OhterNextIterMaxAddr) {
3097 int64_t BaseMaxAddr = OffsetB + AccessSizeB.
getValue() - 1;
3098 int64_t OtherNextIterMinAddr = OffsetO + Delta;
3099 if (BaseMaxAddr < OtherNextIterMinAddr) {
3113 if ((!Edge.isOrderDep() && !Edge.isOutputDep()) || Edge.isArtificial() ||
3114 Edge.getDst()->isBoundaryNode())
3120 if (Edge.isOutputDep())
3125 assert(
SI !=
nullptr && DI !=
nullptr &&
"Expecting SUnit with an MI.");
3142void SwingSchedulerDAG::postProcessDAG() {
3143 for (
auto &M : Mutations)
3153 bool forward =
true;
3155 dbgs() <<
"Trying to insert node between " << StartCycle <<
" and "
3156 << EndCycle <<
" II: " <<
II <<
"\n";
3158 if (StartCycle > EndCycle)
3162 int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
3163 for (
int curCycle = StartCycle; curCycle != termCycle;
3164 forward ? ++curCycle : --curCycle) {
3167 ProcItinResources.canReserveResources(*SU, curCycle)) {
3169 dbgs() <<
"\tinsert at cycle " << curCycle <<
" ";
3174 ProcItinResources.reserveResources(*SU, curCycle);
3175 ScheduledInstrs[curCycle].push_back(SU);
3176 InstrToCycle.insert(std::make_pair(SU, curCycle));
3177 if (curCycle > LastCycle)
3178 LastCycle = curCycle;
3179 if (curCycle < FirstCycle)
3180 FirstCycle = curCycle;
3184 dbgs() <<
"\tfailed to insert at cycle " << curCycle <<
" ";
3197 int EarlyCycle = INT_MAX;
3198 while (!Worklist.
empty()) {
3201 if (Visited.
count(PrevSU))
3203 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
3204 if (it == InstrToCycle.end())
3206 EarlyCycle = std::min(EarlyCycle, it->second);
3207 for (
const auto &IE : DDG->
getInEdges(PrevSU))
3208 if (IE.isOrderDep() || IE.isOutputDep())
3221 int LateCycle = INT_MIN;
3222 while (!Worklist.
empty()) {
3227 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
3228 if (it == InstrToCycle.end())
3230 LateCycle = std::max(LateCycle, it->second);
3232 if (OE.isOrderDep() || OE.isOutputDep())
3243 for (
auto &
P : SU->
Preds)
3244 if (
P.getKind() ==
SDep::Anti &&
P.getSUnit()->getInstr()->isPHI())
3245 for (
auto &S :
P.getSUnit()->Succs)
3246 if (S.getKind() ==
SDep::Data && S.getSUnit()->getInstr()->isPHI())
3247 return P.getSUnit();
3260 for (
int cycle =
getFirstCycle(); cycle <= LastCycle; ++cycle) {
3263 if (IE.getSrc() ==
I) {
3268 *MinLateStart = std::min(*MinLateStart, End);
3270 int EarlyStart = cycle + IE.getLatency() - IE.getDistance() *
II;
3271 *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3276 if (OE.getDst() ==
I) {
3281 *MaxEarlyStart = std::max(*MaxEarlyStart, Start);
3283 int LateStart = cycle - OE.getLatency() + OE.getDistance() *
II;
3284 *MinLateStart = std::min(*MinLateStart, LateStart);
3289 for (
const auto &Dep : SU->
Preds) {
3292 if (BE && Dep.getSUnit() == BE && !SU->
getInstr()->
isPHI() &&
3294 *MinLateStart = std::min(*MinLateStart, cycle);
3304 std::deque<SUnit *> &Insts)
const {
3306 bool OrderBeforeUse =
false;
3307 bool OrderAfterDef =
false;
3308 bool OrderBeforeDef =
false;
3309 unsigned MoveDef = 0;
3310 unsigned MoveUse = 0;
3315 for (std::deque<SUnit *>::iterator
I = Insts.begin(), E = Insts.end();
I != E;
3318 if (!MO.isReg() || !MO.getReg().isVirtual())
3322 unsigned BasePos, OffsetPos;
3323 if (ST.getInstrInfo()->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos))
3324 if (
MI->getOperand(BasePos).getReg() == Reg)
3328 std::tie(Reads, Writes) =
3329 (*I)->getInstr()->readsWritesVirtualRegister(Reg);
3331 OrderBeforeUse =
true;
3336 OrderAfterDef =
true;
3338 }
else if (MO.isUse() && Writes &&
stageScheduled(*
I) == StageInst1) {
3340 OrderBeforeUse =
true;
3344 OrderAfterDef =
true;
3348 OrderBeforeUse =
true;
3352 OrderAfterDef =
true;
3357 OrderBeforeUse =
true;
3363 OrderBeforeDef =
true;
3371 if (OE.getDst() != *
I)
3374 OrderBeforeUse =
true;
3381 else if ((OE.isAntiDep() || OE.isOutputDep()) &&
3383 OrderBeforeUse =
true;
3384 if ((MoveUse == 0) || (Pos < MoveUse))
3389 if (IE.getSrc() != *
I)
3391 if ((IE.isAntiDep() || IE.isOutputDep() || IE.isOrderDep()) &&
3393 OrderAfterDef =
true;
3400 if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3401 OrderBeforeUse =
false;
3406 OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3410 if (OrderBeforeUse && OrderAfterDef) {
3411 SUnit *UseSU = Insts.at(MoveUse);
3412 SUnit *DefSU = Insts.at(MoveDef);
3413 if (MoveUse > MoveDef) {
3414 Insts.erase(Insts.begin() + MoveUse);
3415 Insts.erase(Insts.begin() + MoveDef);
3417 Insts.erase(Insts.begin() + MoveDef);
3418 Insts.erase(Insts.begin() + MoveUse);
3428 Insts.push_front(SU);
3430 Insts.push_back(SU);
3438 assert(Phi.isPHI() &&
"Expecting a Phi.");
3445 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3453 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3471 if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3477 if (DMO.getReg() == LoopReg)
3488 if (InstrToCycle.count(IE.getSrc()))
3499 for (
auto &SU : SSD->
SUnits)
3504 while (!Worklist.
empty()) {
3506 if (DoNotPipeline.
count(SU))
3509 DoNotPipeline.
insert(SU);
3516 if (OE.getDistance() == 1)
3519 return DoNotPipeline;
3528 int NewLastCycle = INT_MIN;
3533 NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);
3540 if (IE.getDistance() == 0)
3541 NewCycle = std::max(InstrToCycle[IE.getSrc()], NewCycle);
3546 if (OE.getDistance() == 1)
3547 NewCycle = std::max(InstrToCycle[OE.getDst()], NewCycle);
3549 int OldCycle = InstrToCycle[&SU];
3550 if (OldCycle != NewCycle) {
3551 InstrToCycle[&SU] = NewCycle;
3556 <<
") is not pipelined; moving from cycle " << OldCycle
3557 <<
" to " << NewCycle <<
" Instr:" << *SU.
getInstr());
3582 if (FirstCycle + InitiationInterval <= NewCycle)
3585 NewLastCycle = std::max(NewLastCycle, NewCycle);
3587 LastCycle = NewLastCycle;
3604 int CycleDef = InstrToCycle[&SU];
3605 assert(StageDef != -1 &&
"Instruction should have been scheduled.");
3607 SUnit *Dst = OE.getDst();
3608 if (OE.isAssignedRegDep() && !Dst->isBoundaryNode())
3609 if (OE.getReg().isPhysical()) {
3612 if (InstrToCycle[Dst] <= CycleDef)
3630void SwingSchedulerDAG::checkValidNodeOrder(
const NodeSetType &Circuits)
const {
3633 typedef std::pair<SUnit *, unsigned> UnitIndex;
3634 std::vector<UnitIndex> Indices(
NodeOrder.size(), std::make_pair(
nullptr, 0));
3636 for (
unsigned i = 0, s =
NodeOrder.size(); i < s; ++i)
3637 Indices.push_back(std::make_pair(
NodeOrder[i], i));
3639 auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3640 return std::get<0>(i1) < std::get<0>(i2);
3653 for (
unsigned i = 0, s =
NodeOrder.size(); i < s; ++i) {
3657 bool PredBefore =
false;
3658 bool SuccBefore =
false;
3665 for (
const auto &IE : DDG->getInEdges(SU)) {
3666 SUnit *PredSU = IE.getSrc();
3667 unsigned PredIndex = std::get<1>(
3676 for (
const auto &OE : DDG->getOutEdges(SU)) {
3677 SUnit *SuccSU = OE.getDst();
3683 unsigned SuccIndex = std::get<1>(
3696 Circuits, [SU](
const NodeSet &Circuit) {
return Circuit.
count(SU); });
3701 NumNodeOrderIssues++;
3705 <<
" are scheduled before node " << SU->
NodeNum
3712 dbgs() <<
"Invalid node order found!\n";
3725 for (
SUnit *SU : Instrs) {
3727 for (
unsigned i = 0, e =
MI->getNumOperands(); i < e; ++i) {
3735 InstrChanges.find(SU);
3736 if (It != InstrChanges.
end()) {
3737 unsigned BasePos, OffsetPos;
3739 if (
TII->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos)) {
3743 MI->getOperand(OffsetPos).getImm() - It->second.second;
3756 unsigned TiedUseIdx = 0;
3757 if (
MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3759 OverlapReg =
MI->getOperand(TiedUseIdx).getReg();
3761 NewBaseReg =
MI->getOperand(i).getReg();
3770 const std::deque<SUnit *> &Instrs)
const {
3771 std::deque<SUnit *> NewOrderPhi;
3772 for (
SUnit *SU : Instrs) {
3774 NewOrderPhi.push_back(SU);
3776 std::deque<SUnit *> NewOrderI;
3777 for (
SUnit *SU : Instrs) {
3793 std::deque<SUnit *> &cycleInstrs =
3794 ScheduledInstrs[cycle + (stage * InitiationInterval)];
3796 ScheduledInstrs[cycle].push_front(SU);
3802 for (
int cycle =
getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3803 ScheduledInstrs.erase(cycle);
3813 std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[
Cycle];
3822 os <<
"Num nodes " <<
size() <<
" rec " << RecMII <<
" mov " << MaxMOV
3823 <<
" depth " << MaxDepth <<
" col " << Colocate <<
"\n";
3824 for (
const auto &
I : Nodes)
3825 os <<
" SU(" <<
I->NodeNum <<
") " << *(
I->getInstr());
3829#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3836 for (
SUnit *CI : cycleInstrs->second) {
3838 os <<
"(" << CI->
NodeNum <<
") ";
3849void ResourceManager::dumpMRT()
const {
3853 std::stringstream SS;
3855 SS << std::setw(4) <<
"Slot";
3856 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I)
3857 SS << std::setw(3) <<
I;
3858 SS << std::setw(7) <<
"#Mops"
3860 for (
int Slot = 0; Slot < InitiationInterval; ++Slot) {
3861 SS << std::setw(4) << Slot;
3862 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I)
3863 SS << std::setw(3) << MRT[Slot][
I];
3864 SS << std::setw(7) << NumScheduledMops[Slot] <<
"\n";
3873 unsigned ProcResourceID = 0;
3877 assert(SM.getNumProcResourceKinds() < 64 &&
3878 "Too many kinds of resources, unsupported");
3881 Masks.
resize(SM.getNumProcResourceKinds());
3882 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
3884 if (
Desc.SubUnitsIdxBegin)
3886 Masks[
I] = 1ULL << ProcResourceID;
3890 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
3892 if (!
Desc.SubUnitsIdxBegin)
3894 Masks[
I] = 1ULL << ProcResourceID;
3895 for (
unsigned U = 0; U <
Desc.NumUnits; ++U)
3896 Masks[
I] |= Masks[
Desc.SubUnitsIdxBegin[U]];
3901 dbgs() <<
"ProcResourceDesc:\n";
3902 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
3904 dbgs() <<
format(
" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
3905 ProcResource->
Name,
I, Masks[
I],
3908 dbgs() <<
" -----------------\n";
3916 dbgs() <<
"canReserveResources:\n";
3919 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
3925 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
3931 reserveResources(SCDesc,
Cycle);
3932 bool Result = !isOverbooked();
3933 unreserveResources(SCDesc,
Cycle);
3942 dbgs() <<
"reserveResources:\n";
3945 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
3951 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
3957 reserveResources(SCDesc,
Cycle);
3962 dbgs() <<
"reserveResources: done!\n\n";
3973 ++MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
3976 ++NumScheduledMops[positiveModulo(
C, InitiationInterval)];
3985 --MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
3988 --NumScheduledMops[positiveModulo(
C, InitiationInterval)];
3991bool ResourceManager::isOverbooked()
const {
3993 for (
int Slot = 0;
Slot < InitiationInterval; ++
Slot) {
3994 for (
unsigned I = 1,
E = SM.getNumProcResourceKinds();
I <
E; ++
I) {
3995 const MCProcResourceDesc *
Desc = SM.getProcResource(
I);
3996 if (MRT[Slot][
I] >
Desc->NumUnits)
3999 if (NumScheduledMops[Slot] > IssueWidth)
4005int ResourceManager::calculateResMIIDFA()
const {
4010 FuncUnitSorter FUS = FuncUnitSorter(*ST);
4011 for (SUnit &SU : DAG->
SUnits)
4012 FUS.calcCriticalResources(*SU.
getInstr());
4013 PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter>
4016 for (SUnit &SU : DAG->
SUnits)
4023 while (!FuncUnitOrder.empty()) {
4024 MachineInstr *
MI = FuncUnitOrder.top();
4025 FuncUnitOrder.pop();
4026 if (
TII->isZeroCost(
MI->getOpcode()))
4032 unsigned ReservedCycles = 0;
4033 auto *RI = Resources.
begin();
4034 auto *RE = Resources.
end();
4036 dbgs() <<
"Trying to reserve resource for " << NumCycles
4037 <<
" cycles for \n";
4040 for (
unsigned C = 0;
C < NumCycles; ++
C)
4042 if ((*RI)->canReserveResources(*
MI)) {
4043 (*RI)->reserveResources(*
MI);
4050 <<
", NumCycles:" << NumCycles <<
"\n");
4052 for (
unsigned C = ReservedCycles;
C < NumCycles; ++
C) {
4054 <<
"NewResource created to reserve resources"
4057 assert(NewResource->canReserveResources(*
MI) &&
"Reserve error.");
4058 NewResource->reserveResources(*
MI);
4059 Resources.
push_back(std::unique_ptr<DFAPacketizer>(NewResource));
4063 int Resmii = Resources.
size();
4070 return calculateResMIIDFA();
4077 for (
SUnit &SU : DAG->SUnits) {
4089 <<
" WriteProcRes: ";
4094 make_range(STI->getWriteProcResBegin(SCDesc),
4095 STI->getWriteProcResEnd(SCDesc))) {
4099 SM.getProcResource(PRE.ProcResourceIdx);
4100 dbgs() <<
Desc->Name <<
": " << PRE.ReleaseAtCycle <<
", ";
4103 ResourceCount[PRE.ProcResourceIdx] += PRE.ReleaseAtCycle;
4108 int Result = (NumMops + IssueWidth - 1) / IssueWidth;
4111 dbgs() <<
"#Mops: " << NumMops <<
", "
4112 <<
"IssueWidth: " << IssueWidth <<
", "
4113 <<
"Cycles: " << Result <<
"\n";
4118 std::stringstream SS;
4119 SS << std::setw(2) <<
"ID" << std::setw(16) <<
"Name" << std::setw(10)
4120 <<
"Units" << std::setw(10) <<
"Consumed" << std::setw(10) <<
"Cycles"
4125 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
4127 int Cycles = (ResourceCount[
I] +
Desc->NumUnits - 1) /
Desc->NumUnits;
4130 std::stringstream SS;
4131 SS << std::setw(2) <<
I << std::setw(16) <<
Desc->Name << std::setw(10)
4132 <<
Desc->NumUnits << std::setw(10) << ResourceCount[
I]
4133 << std::setw(10) << Cycles <<
"\n";
4137 if (Cycles > Result)
4144 InitiationInterval =
II;
4145 DFAResources.clear();
4146 DFAResources.resize(
II);
4147 for (
auto &
I : DFAResources)
4148 I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
4151 NumScheduledMops.clear();
4152 NumScheduledMops.resize(
II);
4156 if (Pred.isArtificial() || Dst->isBoundaryNode())
4161 return IgnoreAnti && (Pred.getKind() ==
SDep::Kind::Anti || Distance != 0);
4164SwingSchedulerDDG::SwingSchedulerDDGEdges &
4165SwingSchedulerDDG::getEdges(
const SUnit *SU) {
4167 return EntrySUEdges;
4173const SwingSchedulerDDG::SwingSchedulerDDGEdges &
4174SwingSchedulerDDG::getEdges(
const SUnit *SU)
const {
4176 return EntrySUEdges;
4182void SwingSchedulerDDG::addEdge(
const SUnit *SU,
4183 const SwingSchedulerDDGEdge &
Edge) {
4185 "Validation-only edges are not expected here.");
4186 auto &Edges = getEdges(SU);
4187 if (
Edge.getSrc() == SU)
4188 Edges.Succs.push_back(
Edge);
4190 Edges.Preds.push_back(
Edge);
4193void SwingSchedulerDDG::initEdges(SUnit *SU) {
4194 for (
const auto &PI : SU->
Preds) {
4195 SwingSchedulerDDGEdge
Edge(SU, PI,
false,
4200 for (
const auto &SI : SU->
Succs) {
4201 SwingSchedulerDDGEdge
Edge(SU, SI,
true,
4209 : EntrySU(EntrySU), ExitSU(ExitSU) {
4210 EdgesVec.resize(SUnits.size());
4215 for (
auto &SU : SUnits)
4219 for (
SUnit &SU : SUnits) {
4224 for (
SUnit *Dst : *OD) {
4227 Edge.setDistance(1);
4228 ValidationOnlyEdges.push_back(Edge);
4234const SwingSchedulerDDG::EdgesType &
4236 return getEdges(SU).Preds;
4239const SwingSchedulerDDG::EdgesType &
4241 return getEdges(SU).Succs;
4248 auto ExpandCycle = [&](
SUnit *SU) {
4255 SUnit *Src = Edge.getSrc();
4256 SUnit *Dst = Edge.getDst();
4257 if (!Src->isInstr() || !Dst->isInstr())
4259 int CycleSrc = ExpandCycle(Src);
4260 int CycleDst = ExpandCycle(Dst);
4261 int MaxLateStart = CycleDst + Edge.getDistance() *
II - Edge.getLatency();
4262 if (CycleSrc > MaxLateStart) {
4264 dbgs() <<
"Validation failed for edge from " << Src->NodeNum <<
" to "
4265 << Dst->NodeNum <<
"\n";
4275 for (
SUnit &SU : SUnits) {
4304 !
TII->isGlobalMemoryObject(FromMI) &&
4322 const auto DumpSU = [](
const SUnit *SU) {
4323 std::ostringstream OSS;
4324 OSS <<
"SU(" << SU->
NodeNum <<
")";
4328 dbgs() <<
" Loop carried edges from " << DumpSU(SU) <<
"\n"
4330 for (
SUnit *Dst : *Order)
4331 dbgs() <<
" " << DumpSU(Dst) <<
"\n";
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::optional< unsigned > getTag(const TargetRegisterInfo *TRI, const MachineInstr &MI, const LoadInfo &LI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file defines the DenseMap class.
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)
print mir2vec MIR2Vec Vocabulary Printer Pass
static cl::opt< int > SwpForceII("pipeliner-force-ii", cl::desc("Force pipeliner to use specified II."), cl::Hidden, cl::init(-1))
A command line argument to force pipeliner to use specified initial interval.
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
static cl::opt< bool > MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false), cl::desc("Use the MVE code generator for software pipelining"))
static cl::opt< int > RegPressureMargin("pipeliner-register-pressure-margin", cl::Hidden, cl::init(5), cl::desc("Margin representing the unused percentage of " "the register pressure limit"))
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, Register &InitVal, Register &LoopVal)
Return the register values for the operands of a Phi instruction.
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
static void computeScheduledInsts(const SwingSchedulerDAG *SSD, SMSchedule &Schedule, std::vector< MachineInstr * > &OrderedInsts, DenseMap< MachineInstr *, unsigned > &Stages)
Create an instruction stream that represents a single iteration and stage of each instruction.
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
static Register getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
static bool findLoopIncrementValue(const MachineOperand &Op, int &Value)
When Op is a value that is incremented recursively in a loop and there is a unique instruction that i...
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::desc("Ignore RecMII"))
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
static cl::opt< unsigned > SwpMaxNumStores("pipeliner-max-num-stores", cl::desc("Maximum number of stores allwed in the target loop."), cl::Hidden, cl::init(200))
A command line argument to limit the number of store instructions in the target basic block.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
static bool hasLoopCarriedMemDep(const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst, BatchAAResults &BAA, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, const SwingSchedulerDAG *SSD, bool PerformCheapCheck)
Returns true if there is a loop-carried order dependency from Src to Dst.
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
static cl::opt< int > SwpIISearchRange("pipeliner-ii-search-range", cl::desc("Range to search for II"), cl::Hidden, cl::init(10))
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited, SwingSchedulerDDG *DDG)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
static cl::opt< bool > LimitRegPressure("pipeliner-register-pressure", cl::Hidden, cl::init(false), cl::desc("Limit register pressure of scheduled loop"))
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
static Register findUniqueOperandDefinedInLoop(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
Promote Memory to Register
This file provides utility analysis objects describing memory locations.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PriorityQueue class.
Remove Loads Into Fake Uses
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Target-Independent Code Generator Pass Configuration Options pass.
Add loop-carried chain dependencies.
void computeDependencies()
The main function to compute loop-carried order-dependencies.
const BitVector & getLoopCarried(unsigned Idx) const
LoopCarriedOrderDepsTracker(SwingSchedulerDAG *SSD, BatchAAResults *BAA, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
AttributeList getAttributes() const
Return the attribute list for this Function.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isPostIncrement(const MachineInstr &MI) const override
Return true for post-incremented instructions.
DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &STI) const override
Create machine specific model for scheduling.
bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const override
For instructions with a base and offset, return the position of the base register and offset operands...
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
bool isEmpty() const
Returns true if there are no itineraries.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
TypeSize getValue() const
Represents a single loop in the control flow graph.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI StringRef getString() const
MachineInstrBundleIterator< const MachineInstr > const_iterator
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
filtered_mop_range all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool isRegSequence() const
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
const Value * getValue() const
Return the base address of the memory access.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
The main class in the implementation of the target independent software pipeliner pass.
bool runOnMachineFunction(MachineFunction &MF) override
The "main" function for implementing Swing Modulo Scheduling.
const TargetInstrInfo * TII
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineDominatorTree * MDT
const MachineLoopInfo * MLI
MachineOptimizationRemarkEmitter * ORE
RegisterClassInfo RegClassInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_instr_iterator< true, false, false, true > use_instr_iterator
use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the specified register,...
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
Expand the kernel using modulo variable expansion algorithm (MVE).
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
SUnit * getNode(unsigned i) const
void print(raw_ostream &os) const
void setRecMII(unsigned mii)
unsigned count(SUnit *SU) const
void setColocate(unsigned c)
int compareRecMII(NodeSet &RHS)
LLVM_DUMP_METHOD void dump() const
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A reimplementation of ModuloScheduleExpander.
PointerIntPair - This class implements a pair of a pointer and small integer.
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void addLiveRegs(ArrayRef< VRegMaskOrUnit > Regs)
Force liveness of virtual registers or physical register units.
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
int calculateResMII() const
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
void init(int II)
Initialize resources with the initiation interval II.
bool canReserveResources(SUnit &SU, int Cycle)
Check if the resources occupied by a machine instruction are available in the current state.
Kind
These are the different kinds of scheduling dependencies.
@ Order
Any other ordering dependency.
@ Anti
A register anti-dependence (aka WAR).
@ Data
Regular data dependence (aka true-dependence).
void setLatency(unsigned Lat)
Sets the latency for this edge.
@ Barrier
An unknown scheduling barrier.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
This class represents the scheduled code.
std::deque< SUnit * > reorderInstructions(const SwingSchedulerDAG *SSD, const std::deque< SUnit * > &Instrs) const
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
void dump() const
Utility function used for debugging to print the schedule.
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
int earliestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the earliest scheduled instruction in the dependence chain.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU, const SwingSchedulerDDG *DDG) const
Return true if all scheduled predecessors are loop-carried output/order dependencies.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts) const
Order the instructions within a cycle so that the definitions occur before the uses.
bool isValidSchedule(SwingSchedulerDAG *SSD)
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
DenseMap< int, std::deque< SUnit * > >::const_iterator const_sched_iterator
bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO) const
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
SmallPtrSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Determine transitive dependences of unpipelineable instructions.
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const
Return true if the scheduled Phi has a loop carried operand.
int latestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the latest scheduled instruction in the dependence chain.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
LLVM_ABI void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
unsigned short Latency
Node latency.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
bool hasPhysRegDefs
Has physreg defs that are being used.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
LLVM_ABI bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock * BB
The block in which to insert instructions.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
void dump() const override
LLVM_ABI void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
LLVM_ABI bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SUnit EntrySU
Special node for the region entry.
MachineFunction & MF
Machine function.
SUnit ExitSU
Special node for the region exit.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void insert_range(Range &&R)
typename vector_type::const_iterator iterator
void clear()
Completely clear the SetVector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
const SwingSchedulerDDG * getDDG() const
void finishBlock() override
Clean up after the software pipeliner runs.
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const
Return true for an order or output dependence that is loop carried potentially.
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
bool mayOverlapInLaterIter(const MachineInstr *BaseMI, const MachineInstr *OtherMI) const
Return false if there is no overlap between the region accessed by BaseMI in an iteration and the reg...
Register getInstrBaseReg(SUnit *SU) const
Return the new base register that was stored away for the changed instruction.
Represents a dependence between two instruction.
SUnit * getDst() const
Returns the SUnit to which the edge points (destination node).
bool ignoreDependence(bool IgnoreAnti) const
Returns true for DDG nodes that we ignore when computing the cost functions.
SUnit * getSrc() const
Returns the SUnit from which the edge comes (source node).
This class provides APIs to retrieve edges from/to an SUnit node, with a particular focus on loop-car...
SwingSchedulerDDG(std::vector< SUnit > &SUnits, SUnit *EntrySU, SUnit *ExitSU, const LoopCarriedEdges &LCE)
const EdgesType & getInEdges(const SUnit *SU) const
bool isValidSchedule(const SMSchedule &Schedule) const
Check if Schedule doesn't violate the validation-only dependencies.
const EdgesType & getOutEdges(const SUnit *SU) const
Object returned by analyzeLoopForPipelining.
virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const =0
Return true if the given instruction should not be pipelined and should be ignored.
TargetInstrInfo - Interface to description of machine instruction set.
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool enableMachinePipeliner() const
True if the subtarget should run MachinePipeliner.
virtual bool useDFAforSMS() const
Default to DFA for resource management, return false when target will use ProcResource in InstrSchedM...
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
The main class in the implementation of the target independent window scheduler.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< DefNode * > Def
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
std::set< NodeId > NodeSet
friend class Instruction
Iterator for Instructions in a `BasicBlock.
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void stable_sort(R &&Range)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
@ WS_Force
Use window algorithm after SMS algorithm fails.
@ WS_On
Turn off window algorithm.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
unsigned MCRegUnit
Register units are used to compute register aliasing.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
LLVM_ABI char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
cl::opt< bool > SwpEnableCopyToPhi
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static cl::opt< WindowSchedulingFlag > WindowSchedulingOption("window-sched", cl::Hidden, cl::init(WindowSchedulingFlag::WS_On), cl::desc("Set how to use window scheduling algorithm."), cl::values(clEnumValN(WindowSchedulingFlag::WS_Off, "off", "Turn off window algorithm."), clEnumValN(WindowSchedulingFlag::WS_On, "on", "Use window algorithm after SMS algorithm fails."), clEnumValN(WindowSchedulingFlag::WS_Force, "force", "Use window algorithm instead of SMS algorithm.")))
A command line argument to set the window scheduling option.
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
LLVM_ABI bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.
int popcount(T Value) noexcept
Count the number of set bits in a value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This class holds an SUnit corresponding to a memory operation and other information related to the in...
const Value * MemOpValue
The value of a memory operand.
SmallVector< const Value *, 2 > UnderlyingObjs
bool isTriviallyDisjoint(const SUnitWithMemInfo &Other) const
int64_t MemOpOffset
The offset of a memory operand.
bool IsAllIdentified
True if all the underlying objects are identified.
SUnitWithMemInfo(SUnit *SU)
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
uint64_t FuncUnits
Bitmask representing a set of functional units.
static constexpr LaneBitmask getNone()
Represents loop-carried dependencies.
SmallSetVector< SUnit *, 8 > OrderDep
const OrderDep * getOrderDepOrNull(SUnit *Key) const
void modifySUnits(std::vector< SUnit > &SUnits, const TargetInstrInfo *TII)
Adds some edges to the original DAG that correspond to loop-carried dependencies.
void dump(SUnit *SU, const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI) const
Define a kind of processor resource that will be modeled by the scheduler.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.