22#define DEBUG_TYPE "machine-scheduler"
126 case NoCand:
return "NOCAND";
128 case Latency:
return "LATENCY";
130 case Depth:
return "DEPTH";
143 if (TryVal < CandVal) {
147 if (TryVal > CandVal) {
160 if (TryVal > CandVal) {
164 if (TryVal < CandVal) {
177 NodeNum2Index[SU->
NodeNum] = SUnits.size();
178 SUnits.push_back(SU);
182void SIScheduleBlock::traceCandidate(
const SISchedCandidate &Cand) {
189void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
190 SISchedCandidate &TryCand) {
192 if (!Cand.isValid()) {
197 if (Cand.SGPRUsage > 60 &&
218 Cand.HasLowLatencyNonWaitedParent,
226 if (TryCand.IsLowLatency &&
236 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
241SUnit* SIScheduleBlock::pickNode() {
242 SISchedCandidate TopCand;
244 for (
SUnit* SU : TopReadySUs) {
245 SISchedCandidate TryCand;
246 std::vector<unsigned> pressure;
247 std::vector<unsigned> MaxPressure;
251 TryCand.SGPRUsage = pressure[AMDGPU::RegisterPressureSets::SReg_32];
252 TryCand.VGPRUsage = pressure[AMDGPU::RegisterPressureSets::VGPR_32];
255 TryCand.HasLowLatencyNonWaitedParent =
256 HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
257 tryCandidateTopDown(TopCand, TryCand);
258 if (TryCand.Reason !=
NoCand)
259 TopCand.setBest(TryCand);
272 for (
SUnit* SU : SUnits) {
273 if (!SU->NumPredsLeft)
274 TopReadySUs.push_back(SU);
277 while (!TopReadySUs.empty()) {
278 SUnit *SU = TopReadySUs[0];
279 ScheduledSUnits.push_back(SU);
291 UI =
MRI->def_instr_begin(Reg),
292 UE =
MRI->def_instr_end(); UI != UE; ++UI) {
294 if (
MI->isDebugValue())
297 if (InstSlot >=
First && InstSlot <=
Last)
315 for (
SUnit* SU : ScheduledSUnits) {
316 RPTracker.setPos(SU->getInstr());
321 RPTracker.closeRegion();
324 TopRPTracker.
addLiveRegs(RPTracker.getPressure().LiveInRegs);
325 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
328 for (
const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
329 if (RegMaskPair.RegUnit.isVirtual())
330 LiveInRegs.insert(RegMaskPair.RegUnit);
355 for (
const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
357 if (
Reg.isVirtual() &&
361 LiveOutRegs.insert(Reg);
382 initRegPressure(BeginBlock, EndBlock);
389 for (
SUnit* SU : SUnits) {
390 if (!SU->NumPredsLeft)
391 TopReadySUs.push_back(SU);
394 while (!TopReadySUs.empty()) {
395 SUnit *SU = pickNode();
396 ScheduledSUnits.push_back(SU);
403 InternalAdditionalPressure.resize(TopPressure.
MaxSetPressure.size());
407 assert(SUnits.size() == ScheduledSUnits.size() &&
408 TopReadySUs.empty());
409 for (
SUnit* SU : SUnits) {
411 SU->NumPredsLeft == 0);
418void SIScheduleBlock::undoSchedule() {
419 for (
SUnit* SU : SUnits) {
420 SU->isScheduled =
false;
421 for (
SDep& Succ : SU->Succs) {
423 undoReleaseSucc(SU, &Succ);
426 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
427 ScheduledSUnits.clear();
431void SIScheduleBlock::undoReleaseSucc(
SUnit *SU,
SDep *SuccEdge) {
441void SIScheduleBlock::releaseSucc(
SUnit *SU,
SDep *SuccEdge) {
450 dbgs() <<
"*** Scheduling failed! ***\n";
452 dbgs() <<
" has been released too many times!\n";
461void SIScheduleBlock::releaseSuccessors(
SUnit *SU,
bool InOrOutBlock) {
471 releaseSucc(SU, &Succ);
473 TopReadySUs.push_back(SuccSU);
477void SIScheduleBlock::nodeScheduled(
SUnit *SU) {
480 std::vector<SUnit *>::iterator
I =
llvm::find(TopReadySUs, SU);
481 if (
I == TopReadySUs.end()) {
482 dbgs() <<
"Data Structure Bug in SI Scheduler\n";
485 TopReadySUs.erase(
I);
487 releaseSuccessors(SU,
true);
490 if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->
NodeNum]])
491 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
495 std::map<unsigned, unsigned>::iterator
I =
497 if (
I != NodeNum2Index.end())
498 HasLowLatencyNonWaitedParent[
I->second] = 1;
506 for (
SUnit* SU : SUnits) {
507 releaseSuccessors(SU,
false);
509 HighLatencyBlock =
true;
511 HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
516 unsigned PredID = Pred->
getID();
520 if (PredID ==
P->getID())
523 Preds.push_back(Pred);
528 return PredID == S.first->getID();
530 "Loop in the Block Graph!");
535 unsigned SuccID = Succ->
getID();
538 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) {
539 if (SuccID == S.first->getID()) {
547 ++NumHighLatencySuccessors;
548 Succs.emplace_back(Succ, Kind);
552 "Loop in the Block Graph!");
557 dbgs() <<
"Block (" <<
ID <<
")\n";
561 dbgs() <<
"\nContains High Latency Instruction: "
562 << HighLatencyBlock <<
'\n';
563 dbgs() <<
"\nDepends On:\n";
565 P->printDebug(
false);
568 dbgs() <<
"\nSuccessors:\n";
569 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) {
571 dbgs() <<
"(Data Dep) ";
572 S.first->printDebug(
false);
576 dbgs() <<
"LiveInPressure "
577 << LiveInPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
' '
578 << LiveInPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
'\n';
579 dbgs() <<
"LiveOutPressure "
580 << LiveOutPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
' '
581 << LiveOutPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
"\n\n";
582 dbgs() <<
"LiveIns:\n";
586 dbgs() <<
"\nLiveOuts:\n";
591 dbgs() <<
"\nInstructions:\n";
592 for (
const SUnit* SU : SUnits)
595 dbgs() <<
"///////////////////////\n";
606 std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator
B =
607 Blocks.find(BlockVariant);
608 if (
B == Blocks.end()) {
610 createBlocksForVariant(BlockVariant);
612 scheduleInsideBlocks();
614 Res.
Blocks = CurrentBlocks;
617 Blocks[BlockVariant] = Res;
626 return CurrentBlocks[Node2CurrentBlock[SU->
NodeNum]]->getID() ==
ID;
629void SIScheduleBlockCreator::colorHighLatenciesAlone() {
630 unsigned DAGSize = DAG->
SUnits.size();
632 for (
unsigned i = 0, e = DAGSize; i != e; ++i) {
635 CurrentColoring[SU->
NodeNum] = NextReservedID++;
642 for (
const auto &PredDep : SU.
Preds) {
643 if (PredDep.getSUnit() == &FromSU &&
650void SIScheduleBlockCreator::colorHighLatenciesGroups() {
651 unsigned DAGSize = DAG->
SUnits.size();
652 unsigned NumHighLatencies = 0;
654 int Color = NextReservedID;
656 std::set<unsigned> FormingGroup;
658 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
664 if (NumHighLatencies == 0)
667 if (NumHighLatencies <= 6)
669 else if (NumHighLatencies <= 12)
677 unsigned CompatibleGroup =
true;
678 int ProposedColor = Color;
679 std::vector<int> AdditionalElements;
691 for (
unsigned j : FormingGroup) {
693 std::vector<int> SubGraph;
706 if (SubGraph.size() > 5) {
708 CompatibleGroup =
false;
712 for (
unsigned k : SubGraph) {
718 CurrentColoring[k] != 0)) {
719 CompatibleGroup =
false;
725 CompatibleGroup =
false;
729 if (!CompatibleGroup)
733 CompatibleGroup =
false;
743 if (CompatibleGroup) {
744 FormingGroup.insert(SU.
NodeNum);
745 for (
unsigned j : AdditionalElements)
746 CurrentColoring[
j] = ProposedColor;
747 CurrentColoring[SU.
NodeNum] = ProposedColor;
753 if (!CompatibleGroup) {
754 FormingGroup.clear();
755 Color = ++NextReservedID;
756 ProposedColor = Color;
757 FormingGroup.insert(SU.
NodeNum);
758 CurrentColoring[SU.
NodeNum] = ProposedColor;
760 }
else if (Count == GroupSize) {
761 FormingGroup.clear();
762 Color = ++NextReservedID;
763 ProposedColor = Color;
770void SIScheduleBlockCreator::colorComputeReservedDependencies() {
771 unsigned DAGSize = DAG->
SUnits.size();
772 std::map<std::set<unsigned>,
unsigned> ColorCombinations;
774 CurrentTopDownReservedDependencyColoring.clear();
775 CurrentBottomUpReservedDependencyColoring.clear();
777 CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
778 CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
785 std::set<unsigned> SUColors;
788 if (CurrentColoring[SU->
NodeNum]) {
789 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
798 if (CurrentTopDownReservedDependencyColoring[Pred->
NodeNum] > 0)
799 SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->
NodeNum]);
802 if (SUColors.empty())
805 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
806 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
809 std::map<std::set<unsigned>,
unsigned>::iterator Pos =
810 ColorCombinations.find(SUColors);
811 if (Pos != ColorCombinations.end()) {
812 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] = Pos->second;
814 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
816 ColorCombinations[SUColors] = NextNonReservedID++;
821 ColorCombinations.clear();
827 std::set<unsigned> SUColors;
830 if (CurrentColoring[SU->
NodeNum]) {
831 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
840 if (CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum] > 0)
841 SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum]);
844 if (SUColors.empty())
847 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
848 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
851 std::map<std::set<unsigned>,
unsigned>::iterator Pos =
852 ColorCombinations.find(SUColors);
853 if (Pos != ColorCombinations.end()) {
854 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] = Pos->second;
856 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
858 ColorCombinations[SUColors] = NextNonReservedID++;
864void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
865 std::map<std::pair<unsigned, unsigned>,
unsigned> ColorCombinations;
871 std::pair<unsigned, unsigned> SUColors;
874 if (CurrentColoring[SU.
NodeNum])
877 SUColors.first = CurrentTopDownReservedDependencyColoring[SU.
NodeNum];
878 SUColors.second = CurrentBottomUpReservedDependencyColoring[SU.
NodeNum];
881 ColorCombinations.try_emplace(SUColors, NextNonReservedID);
882 CurrentColoring[SU.
NodeNum] = Pos->second;
888void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
889 unsigned DAGSize = DAG->
SUnits.size();
890 std::vector<int> PendingColoring = CurrentColoring;
893 CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&
894 CurrentTopDownReservedDependencyColoring.size() == DAGSize);
903 std::set<unsigned> SUColors;
904 std::set<unsigned> SUColorsPending;
906 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
909 if (CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] > 0 ||
910 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] > 0)
917 if (CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum] > 0 ||
918 CurrentTopDownReservedDependencyColoring[Succ->
NodeNum] > 0)
919 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
920 SUColorsPending.insert(PendingColoring[Succ->
NodeNum]);
925 if (SUColors.size() == 1 && SUColorsPending.size() == 1)
926 PendingColoring[SU->
NodeNum] = *SUColors.begin();
929 PendingColoring[SU->
NodeNum] = NextNonReservedID++;
931 CurrentColoring = PendingColoring;
935void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
936 unsigned DAGSize = DAG->
SUnits.size();
937 unsigned PreviousColor;
938 std::set<unsigned> SeenColors;
943 PreviousColor = CurrentColoring[0];
945 for (
unsigned i = 1, e = DAGSize; i !=
e; ++i) {
947 unsigned CurrentColor = CurrentColoring[i];
948 unsigned PreviousColorSave = PreviousColor;
951 if (CurrentColor != PreviousColor)
952 SeenColors.insert(PreviousColor);
953 PreviousColor = CurrentColor;
955 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
958 if (SeenColors.find(CurrentColor) == SeenColors.end())
961 if (PreviousColorSave != CurrentColor)
962 CurrentColoring[i] = NextNonReservedID++;
964 CurrentColoring[i] = CurrentColoring[i-1];
968void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
969 unsigned DAGSize = DAG->
SUnits.size();
973 std::set<unsigned> SUColors;
975 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
987 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
989 if (SUColors.size() == 1)
990 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
994void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
995 unsigned DAGSize = DAG->
SUnits.size();
999 std::set<unsigned> SUColors;
1001 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1008 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1010 if (SUColors.size() == 1)
1011 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1015void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
1016 unsigned DAGSize = DAG->
SUnits.size();
1020 std::set<unsigned> SUColors;
1022 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1029 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1031 if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
1032 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1036void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
1037 unsigned DAGSize = DAG->
SUnits.size();
1038 std::map<unsigned, unsigned> ColorCount;
1042 unsigned color = CurrentColoring[SU->
NodeNum];
1043 ++ColorCount[color];
1048 unsigned color = CurrentColoring[SU->
NodeNum];
1049 std::set<unsigned> SUColors;
1051 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1054 if (ColorCount[color] > 1)
1061 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1063 if (SUColors.size() == 1 && *SUColors.begin() != color) {
1064 --ColorCount[color];
1065 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1066 ++ColorCount[*SUColors.begin()];
1071void SIScheduleBlockCreator::cutHugeBlocks() {
1075void SIScheduleBlockCreator::regroupNoUserInstructions() {
1076 unsigned DAGSize = DAG->
SUnits.size();
1077 int GroupID = NextNonReservedID++;
1081 bool hasSuccessor =
false;
1083 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1090 hasSuccessor =
true;
1093 CurrentColoring[SU->
NodeNum] = GroupID;
1097void SIScheduleBlockCreator::colorExports() {
1098 unsigned ExportColor = NextNonReservedID++;
1123 "SUnit unexpectedly not representing an instruction!");
1139 for (
unsigned j : ExpGroup)
1140 CurrentColoring[
j] = ExportColor;
1144 unsigned DAGSize = DAG->
SUnits.size();
1145 std::map<unsigned,unsigned> RealID;
1147 CurrentBlocks.clear();
1148 CurrentColoring.clear();
1149 CurrentColoring.resize(DAGSize, 0);
1150 Node2CurrentBlock.clear();
1156 NextNonReservedID = DAGSize + 1;
1161 colorHighLatenciesGroups();
1163 colorHighLatenciesAlone();
1164 colorComputeReservedDependencies();
1165 colorAccordingToReservedDependencies();
1166 colorEndsAccordingToDependencies();
1168 colorForceConsecutiveOrderInGroup();
1169 regroupNoUserInstructions();
1170 colorMergeConstantLoadsNextGroup();
1171 colorMergeIfPossibleNextGroupOnlyForReserved();
1175 Node2CurrentBlock.resize(DAGSize, -1);
1176 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1178 unsigned Color = CurrentColoring[SU->
NodeNum];
1179 if (RealID.find(Color) == RealID.end()) {
1180 int ID = CurrentBlocks.size();
1181 BlockPtrs.push_back(std::make_unique<SIScheduleBlock>(DAG,
this,
ID));
1182 CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
1185 CurrentBlocks[RealID[Color]]->addUnit(SU);
1186 Node2CurrentBlock[SU->
NodeNum] = RealID[Color];
1190 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1192 int SUID = Node2CurrentBlock[i];
1197 if (Node2CurrentBlock[Succ->
NodeNum] != SUID)
1198 CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->
NodeNum]],
1205 if (Node2CurrentBlock[Pred->
NodeNum] != SUID)
1206 CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->
NodeNum]]);
1212 Block->finalizeUnits();
1214 dbgs() <<
"Blocks created:\n\n";
1216 Block->printDebug(
true);
1226 for (;
I !=
End; ++
I) {
1227 if (!
I->isDebugInstr())
1233void SIScheduleBlockCreator::topologicalSort() {
1234 unsigned DAGSize = CurrentBlocks.size();
1235 std::vector<int> WorkList;
1239 WorkList.reserve(DAGSize);
1240 TopDownIndex2Block.resize(DAGSize);
1241 TopDownBlock2Index.resize(DAGSize);
1242 BottomUpIndex2Block.resize(DAGSize);
1244 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1246 unsigned Degree =
Block->getSuccs().size();
1247 TopDownBlock2Index[i] = Degree;
1249 WorkList.push_back(i);
1254 while (!WorkList.empty()) {
1255 int i = WorkList.back();
1257 WorkList.pop_back();
1258 TopDownBlock2Index[i] = --
Id;
1259 TopDownIndex2Block[
Id] = i;
1261 if (!--TopDownBlock2Index[Pred->getID()])
1262 WorkList.push_back(Pred->getID());
1268 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1271 assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&
1272 "Wrong Top Down topological sorting");
1277 BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
1278 TopDownIndex2Block.rend());
1281void SIScheduleBlockCreator::scheduleInsideBlocks() {
1282 unsigned DAGSize = CurrentBlocks.size();
1288 LLVM_DEBUG(
dbgs() <<
"First phase: Fast scheduling for Reg Liveness\n");
1289 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1291 Block->fastSchedule();
1299 std::vector<MachineBasicBlock::iterator> PosOld;
1300 std::vector<MachineBasicBlock::iterator> PosNew;
1301 PosOld.reserve(DAG->
SUnits.size());
1302 PosNew.reserve(DAG->
SUnits.size());
1304 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1305 int BlockIndice = TopDownIndex2Block[i];
1307 std::vector<SUnit*> SUs =
Block->getScheduledUnits();
1309 for (
SUnit* SU : SUs) {
1312 PosOld.push_back(Pos);
1313 if (&*CurrentTopFastSched ==
MI) {
1314 PosNew.push_back(Pos);
1315 CurrentTopFastSched =
nextIfDebug(++CurrentTopFastSched,
1327 PosNew.push_back(CurrentTopFastSched);
1336 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1338 std::vector<SUnit*> SUs =
Block->getScheduledUnits();
1339 Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
1344 for (
unsigned i = PosOld.size(), e = 0; i != e; --i) {
1358 Block->printDebug(
true);
1362void SIScheduleBlockCreator::fillStats() {
1363 unsigned DAGSize = CurrentBlocks.size();
1365 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1366 int BlockIndice = TopDownIndex2Block[i];
1368 if (
Block->getPreds().empty())
1373 if (Depth < Pred->
Depth + Pred->getCost())
1374 Depth = Pred->Depth + Pred->getCost();
1380 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1381 int BlockIndice = BottomUpIndex2Block[i];
1383 if (
Block->getSuccs().empty())
1386 unsigned Height = 0;
1387 for (
const auto &Succ :
Block->getSuccs())
1388 Height = std::max(Height, Succ.first->Height + Succ.first->getCost());
1389 Block->Height = Height;
1399 DAG(DAG), Variant(Variant),
Blocks(BlocksStruct.
Blocks),
1400 LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
1401 SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
1413 LiveOutRegsNumUsages.resize(Blocks.size());
1419 std::set<Register> PredOutRegs = Pred->getOutRegs();
1420 std::set<Register>::iterator RegPos = PredOutRegs.find(Reg);
1422 if (RegPos != PredOutRegs.end()) {
1434 ++LiveOutRegsNumUsages[PredID][Reg];
1438 LastPosHighLatencyParentScheduled.resize(
Blocks.size(), 0);
1439 BlockNumPredsLeft.resize(
Blocks.size());
1440 BlockNumSuccsLeft.resize(
Blocks.size());
1442 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1444 BlockNumPredsLeft[i] =
Block->getPreds().size();
1445 BlockNumSuccsLeft[i] =
Block->getSuccs().size();
1449 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1455 std::set<Register> InRegs = DAG->
getInRegs();
1456 addLiveRegs(InRegs);
1462 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1466 const std::set<Register> &OutRegs =
Block->getOutRegs();
1468 if (OutRegs.find(Reg) == OutRegs.end())
1471 ++LiveOutRegsNumUsages[
ID][Reg];
1482 std::set<Register> PredOutRegs = Pred->getOutRegs();
1483 std::set<Register>::iterator RegPos = PredOutRegs.find(Reg);
1485 if (RegPos != PredOutRegs.end()) {
1492 ++LiveRegsConsumers[Reg];
1496 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1498 if (BlockNumPredsLeft[i] == 0) {
1499 ReadyBlocks.push_back(
Block);
1504 BlocksScheduled.push_back(
Block);
1505 blockScheduled(
Block);
1509 : BlocksScheduled) {
1514bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
1515 SIBlockSchedCandidate &TryCand) {
1516 if (!Cand.isValid()) {
1523 Cand.LastPosHighLatParentScheduled, TryCand, Cand,
Latency))
1530 TryCand, Cand,
Depth))
1533 Cand.NumHighLatencySuccessors,
1539bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
1540 SIBlockSchedCandidate &TryCand) {
1541 if (!Cand.isValid()) {
1550 Cand.NumSuccessors > 0,
1562 SIBlockSchedCandidate Cand;
1563 std::vector<SIScheduleBlock*>::iterator Best;
1565 if (ReadyBlocks.empty())
1569 VregCurrentUsage, SregCurrentUsage);
1570 if (VregCurrentUsage > maxVregUsage)
1571 maxVregUsage = VregCurrentUsage;
1572 if (SregCurrentUsage > maxSregUsage)
1573 maxSregUsage = SregCurrentUsage;
1577 dbgs() <<
"\nCurrent Live:\n";
1581 dbgs() <<
"Current VGPRs: " << VregCurrentUsage <<
'\n';
1582 dbgs() <<
"Current SGPRs: " << SregCurrentUsage <<
'\n';);
1584 Cand.Block =
nullptr;
1585 for (std::vector<SIScheduleBlock*>::iterator
I = ReadyBlocks.begin(),
1586 E = ReadyBlocks.end();
I != E; ++
I) {
1587 SIBlockSchedCandidate TryCand;
1589 TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
1590 TryCand.VGPRUsageDiff =
1591 checkRegUsageImpact(TryCand.Block->getInRegs(),
1592 TryCand.Block->getOutRegs())[AMDGPU::RegisterPressureSets::VGPR_32];
1593 TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
1594 TryCand.NumHighLatencySuccessors =
1595 TryCand.Block->getNumHighLatencySuccessors();
1596 TryCand.LastPosHighLatParentScheduled =
1597 (
unsigned int) std::max<int> (0,
1598 LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
1599 LastPosWaitedHighLatency);
1600 TryCand.Height = TryCand.Block->Height;
1602 if (VregCurrentUsage > 120 ||
1604 if (!tryCandidateRegUsage(Cand, TryCand) &&
1606 tryCandidateLatency(Cand, TryCand);
1608 if (!tryCandidateLatency(Cand, TryCand))
1609 tryCandidateRegUsage(Cand, TryCand);
1611 if (TryCand.Reason !=
NoCand) {
1612 Cand.setBest(TryCand);
1614 LLVM_DEBUG(
dbgs() <<
"Best Current Choice: " << Cand.Block->getID() <<
' '
1620 dbgs() <<
"Is a block with high latency instruction: "
1621 << (Cand.IsHighLatency ?
"yes\n" :
"no\n");
1622 dbgs() <<
"Position of last high latency dependency: "
1623 << Cand.LastPosHighLatParentScheduled <<
'\n';
1624 dbgs() <<
"VGPRUsageDiff: " << Cand.VGPRUsageDiff <<
'\n';
1628 ReadyBlocks.erase(Best);
1634void SIScheduleBlockScheduler::addLiveRegs(std::set<Register> &Regs) {
1637 if (!
Reg.isVirtual())
1640 (void) LiveRegs.insert(Reg);
1645 std::set<Register> &Regs) {
1648 std::set<Register>::iterator Pos = LiveRegs.find(Reg);
1649 assert (Pos != LiveRegs.end() &&
1650 LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&
1651 LiveRegsConsumers[Reg] >= 1);
1652 --LiveRegsConsumers[
Reg];
1653 if (LiveRegsConsumers[Reg] == 0)
1654 LiveRegs.erase(Pos);
1658void SIScheduleBlockScheduler::releaseBlockSuccs(
SIScheduleBlock *Parent) {
1660 if (--BlockNumPredsLeft[
Block.first->getID()] == 0)
1661 ReadyBlocks.push_back(
Block.first);
1665 LastPosHighLatencyParentScheduled[
Block.first->getID()] = NumBlockScheduled;
1671 addLiveRegs(
Block->getOutRegs());
1672 releaseBlockSuccs(
Block);
1673 for (
const auto &RegP : LiveOutRegsNumUsages[
Block->getID()]) {
1675 assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||
1676 LiveRegsConsumers[RegP.first] == 0);
1677 LiveRegsConsumers[RegP.first] += RegP.second;
1679 if (LastPosHighLatencyParentScheduled[
Block->getID()] >
1680 (
unsigned)LastPosWaitedHighLatency)
1681 LastPosWaitedHighLatency =
1682 LastPosHighLatencyParentScheduled[
Block->getID()];
1683 ++NumBlockScheduled;
1687SIScheduleBlockScheduler::checkRegUsageImpact(std::set<Register> &InRegs,
1688 std::set<Register> &OutRegs) {
1689 std::vector<int> DiffSetPressure;
1694 if (!
Reg.isVirtual())
1696 if (LiveRegsConsumers[Reg] > 1)
1699 for (; PSetI.
isValid(); ++PSetI) {
1700 DiffSetPressure[*PSetI] -= PSetI.
getWeight();
1706 if (!
Reg.isVirtual())
1709 for (; PSetI.
isValid(); ++PSetI) {
1710 DiffSetPressure[*PSetI] += PSetI.
getWeight();
1714 return DiffSetPressure;
1720SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
1721 SISchedulerBlockSchedulerVariant ScheduleVariant) {
1724 std::vector<SIScheduleBlock*> ScheduledBlocks;
1727 ScheduledBlocks =
Scheduler.getBlocks();
1730 std::vector<SUnit*>
SUs =
Block->getScheduledUnits();
1754void SIScheduleDAGMI::topologicalSort() {
1767void SIScheduleDAGMI::moveLowLatencies() {
1768 unsigned DAGSize =
SUnits.size();
1769 int LastLowLatencyUser = -1;
1770 int LastLowLatencyPos = -1;
1772 for (
unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {
1774 bool IsLowLatencyUser =
false;
1775 unsigned MinPos = 0;
1780 IsLowLatencyUser =
true;
1784 unsigned PredPos = ScheduledSUnitsInv[Pred->
NodeNum];
1785 if (PredPos >= MinPos)
1786 MinPos = PredPos + 1;
1790 unsigned BestPos = LastLowLatencyUser + 1;
1791 if ((
int)BestPos <= LastLowLatencyPos)
1792 BestPos = LastLowLatencyPos + 1;
1793 if (BestPos < MinPos)
1796 for (
unsigned u = i;
u > BestPos; --
u) {
1797 ++ScheduledSUnitsInv[ScheduledSUnits[
u-1]];
1798 ScheduledSUnits[
u] = ScheduledSUnits[
u-1];
1800 ScheduledSUnits[BestPos] = SU->
NodeNum;
1801 ScheduledSUnitsInv[SU->
NodeNum] = BestPos;
1803 LastLowLatencyPos = BestPos;
1804 if (IsLowLatencyUser)
1805 LastLowLatencyUser = BestPos;
1806 }
else if (IsLowLatencyUser) {
1807 LastLowLatencyUser = i;
1811 bool CopyForLowLat =
false;
1817 CopyForLowLat =
true;
1823 for (
unsigned u = i;
u > MinPos; --
u) {
1824 ++ScheduledSUnitsInv[ScheduledSUnits[
u-1]];
1825 ScheduledSUnits[
u] = ScheduledSUnits[
u-1];
1827 ScheduledSUnits[MinPos] = SU->
NodeNum;
1828 ScheduledSUnitsInv[SU->
NodeNum] = MinPos;
1835 for (
unsigned i = 0, e =
SUnits.size(); i != e; ++i) {
1836 SUnits[i].isScheduled =
false;
1837 SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;
1838 SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;
1839 SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;
1840 SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;
1845template<
typename _Iterator>
void
1847 unsigned &VgprUsage,
unsigned &SgprUsage) {
1850 for (_Iterator RegI =
First; RegI !=
End; ++RegI) {
1853 if (!
Reg.isVirtual())
1856 for (; PSetI.
isValid(); ++PSetI) {
1857 if (*PSetI == AMDGPU::RegisterPressureSets::VGPR_32)
1859 else if (*PSetI == AMDGPU::RegisterPressureSets::SReg_32)
1891 SUnitsLinksBackup =
SUnits;
1900 for (
unsigned i = 0, e = (
unsigned)
SUnits.size(); i != e; ++i) {
1906 bool OffsetIsScalable;
1907 if (SITII->getMemOperandWithOffset(*SU->
getInstr(), BaseLatOp, OffLatReg,
1908 OffsetIsScalable,
TRI))
1933 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1934 Temp =
Scheduler.scheduleVariant(v.first, v.second);
1954 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1955 Temp =
Scheduler.scheduleVariant(v.first, v.second);
1961 ScheduledSUnits = Best.
SUs;
1962 ScheduledSUnitsInv.resize(
SUnits.size());
1964 for (
unsigned i = 0, e = (
unsigned)
SUnits.size(); i != e; ++i) {
1965 ScheduledSUnitsInv[ScheduledSUnits[i]] = i;
1975 for (
unsigned I : ScheduledSUnits) {
1989 dbgs() <<
"*** Final schedule for "
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
DenseMap< Block *, BlockRelaxAux > Blocks
Machine Instruction Scheduler
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static const char * getReasonStr(SIScheduleCandReason Reason)
static bool hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU)
static bool isDefBetween(Register Reg, SlotIndex First, SlotIndex Last, const MachineRegisterInfo *MRI, const LiveIntervals *LIS)
SI Machine Scheduler interface.
GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
MachineOperand class - Representation of each machine instruction operand.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
PSetIterator getPressureSets(Register RegUnit) const
Get an iterator over the pressure sets affected by the given physical or virtual register.
Iterate over the pressure sets affected by the given physical or virtual register.
unsigned getWeight() const
Track the current register pressure at some position in the instruction stream, and remember the high...
void setPos(MachineBasicBlock::const_iterator Pos)
void addLiveRegs(ArrayRef< VRegMaskOrUnit > Regs)
Force liveness of virtual registers or physical register units.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
void closeTop()
Set the boundary for the top of the region and summarize live ins.
void advance()
Advance across the current instruction.
void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
Wrapper class representing virtual and physical registers.
@ Data
Regular data dependence (aka true-dependence).
bool isWeak() const
Tests if this a weak dependence.
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
static bool isEXP(const MachineInstr &MI)
bool isHighLatencyDef(int Opc) const override
bool isLowLatencyInstruction(const MachineInstr &MI) const
bool isSUInBlock(SUnit *SU, unsigned ID)
SIScheduleBlockCreator(SIScheduleDAGMI *DAG)
SIScheduleBlocks getBlocks(SISchedulerBlockCreatorVariant BlockVariant)
SIScheduleBlockScheduler(SIScheduleDAGMI *DAG, SISchedulerBlockSchedulerVariant Variant, SIScheduleBlocks BlocksStruct)
ArrayRef< std::pair< SIScheduleBlock *, SIScheduleBlockLinkKind > > getSuccs() const
void addPred(SIScheduleBlock *Pred)
void printDebug(bool Full)
void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind)
void schedule(MachineBasicBlock::iterator BeginBlock, MachineBasicBlock::iterator EndBlock)
void addUnit(SUnit *SU)
Functions for Block construction.
bool isHighLatencyBlock()
void restoreSULinksLeft()
std::set< Register > getInRegs()
std::vector< int > BottomUpIndex2SU
std::vector< unsigned > IsHighLatencySU
std::vector< unsigned > LowLatencyOffset
std::vector< int > TopDownIndex2SU
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGTopologicalSort * GetTopo()
void fillVgprSgprCost(_Iterator First, _Iterator End, unsigned &VgprUsage, unsigned &SgprUsage)
MachineRegisterInfo * getMRI()
SIScheduleDAGMI(MachineSchedContext *C)
MachineBasicBlock::iterator getCurrentBottom()
std::vector< unsigned > IsLowLatencySU
MachineBasicBlock::iterator getCurrentTop()
MachineBasicBlock * getBB()
void initRPTracker(RegPressureTracker &RPTracker)
std::set< unsigned > getOutRegs()
~SIScheduleDAGMI() override
const TargetRegisterInfo * getTRI()
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
bool isScheduled
True once scheduled.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
ScheduleDAGTopologicalSort Topo
Topo - A topological ordering for SUnits which permits fast IsReachable and similar queries.
void dumpNode(const SUnit &SU) const override
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
void dump() const override
RegPressureTracker TopRPTracker
void dumpSchedule() const
dump the scheduled Sequence.
std::unique_ptr< MachineSchedStrategy > SchedImpl
void postProcessDAG()
Apply each ScheduleDAGMutation step in order.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
std::vector< int > GetSubGraph(const SUnit &StartSU, const SUnit &TargetSU, bool &Success)
Returns an array of SUs that are both in the successor subtree of StartSU and in the predecessor subt...
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
reverse_iterator rbegin()
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual unsigned getNumRegPressureSets() const =0
Get the number of dimensions of register pressure.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
static bool tryGreater(int TryVal, int CandVal, SISchedulerCandidate &TryCand, SISchedulerCandidate &Cand, SIScheduleCandReason Reason)
static bool tryLess(int TryVal, int CandVal, SISchedulerCandidate &TryCand, SISchedulerCandidate &Cand, SIScheduleCandReason Reason)
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > PrintDAGs
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
SISchedulerBlockSchedulerVariant
cl::opt< bool > ViewMISchedDAGs
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SISchedulerBlockCreatorVariant
@ LatenciesAlonePlusConsecutive
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Implement std::hash so that hash_code can be used in STL containers.
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
std::vector< unsigned > SUs
std::vector< int > TopDownIndex2Block
std::vector< SIScheduleBlock * > Blocks
std::vector< int > TopDownBlock2Index
SIScheduleCandReason Reason
void setRepeat(SIScheduleCandReason R)