22#define DEBUG_TYPE "machine-scheduler"
126 case NoCand:
return "NOCAND";
128 case Latency:
return "LATENCY";
130 case Depth:
return "DEPTH";
144 if (TryVal < CandVal) {
148 if (TryVal > CandVal) {
161 if (TryVal > CandVal) {
165 if (TryVal < CandVal) {
179 NodeNum2Index[SU->
NodeNum] = SUnits.size();
180 SUnits.push_back(SU);
184void SIScheduleBlock::traceCandidate(
const SISchedCandidate &Cand) {
191void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
192 SISchedCandidate &TryCand) {
194 if (!Cand.isValid()) {
199 if (Cand.SGPRUsage > 60 &&
220 Cand.HasLowLatencyNonWaitedParent,
228 if (TryCand.IsLowLatency &&
238 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
243SUnit* SIScheduleBlock::pickNode() {
244 SISchedCandidate TopCand;
246 for (
SUnit* SU : TopReadySUs) {
247 SISchedCandidate TryCand;
248 std::vector<unsigned> pressure;
249 std::vector<unsigned> MaxPressure;
253 TryCand.SGPRUsage = pressure[AMDGPU::RegisterPressureSets::SReg_32];
254 TryCand.VGPRUsage = pressure[AMDGPU::RegisterPressureSets::VGPR_32];
257 TryCand.HasLowLatencyNonWaitedParent =
258 HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
259 tryCandidateTopDown(TopCand, TryCand);
260 if (TryCand.Reason !=
NoCand)
261 TopCand.setBest(TryCand);
274 for (
SUnit* SU : SUnits) {
275 if (!SU->NumPredsLeft)
276 TopReadySUs.push_back(SU);
279 while (!TopReadySUs.empty()) {
280 SUnit *SU = TopReadySUs[0];
281 ScheduledSUnits.push_back(SU);
294 UI =
MRI->def_instr_begin(Reg),
295 UE =
MRI->def_instr_end(); UI != UE; ++UI) {
297 if (
MI->isDebugValue())
300 if (InstSlot >=
First && InstSlot <=
Last)
318 for (
SUnit* SU : ScheduledSUnits) {
319 RPTracker.setPos(SU->getInstr());
324 RPTracker.closeRegion();
327 TopRPTracker.
addLiveRegs(RPTracker.getPressure().LiveInRegs);
328 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
331 for (
const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
332 if (RegMaskPair.RegUnit.isVirtual())
333 LiveInRegs.insert(RegMaskPair.RegUnit);
358 for (
const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
360 if (
Reg.isVirtual() &&
364 LiveOutRegs.insert(Reg);
385 initRegPressure(BeginBlock, EndBlock);
392 for (
SUnit* SU : SUnits) {
393 if (!SU->NumPredsLeft)
394 TopReadySUs.push_back(SU);
397 while (!TopReadySUs.empty()) {
398 SUnit *SU = pickNode();
399 ScheduledSUnits.push_back(SU);
406 InternalAdditionalPressure.resize(TopPressure.
MaxSetPressure.size());
410 assert(SUnits.size() == ScheduledSUnits.size() &&
411 TopReadySUs.empty());
412 for (
SUnit* SU : SUnits) {
414 SU->NumPredsLeft == 0);
421void SIScheduleBlock::undoSchedule() {
422 for (
SUnit* SU : SUnits) {
423 SU->isScheduled =
false;
424 for (
SDep& Succ : SU->Succs) {
426 undoReleaseSucc(SU, &Succ);
429 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
430 ScheduledSUnits.clear();
434void SIScheduleBlock::undoReleaseSucc(
SUnit *SU,
SDep *SuccEdge) {
444void SIScheduleBlock::releaseSucc(
SUnit *SU,
SDep *SuccEdge) {
453 dbgs() <<
"*** Scheduling failed! ***\n";
455 dbgs() <<
" has been released too many times!\n";
464void SIScheduleBlock::releaseSuccessors(
SUnit *SU,
bool InOrOutBlock) {
474 releaseSucc(SU, &Succ);
476 TopReadySUs.push_back(SuccSU);
480void SIScheduleBlock::nodeScheduled(
SUnit *SU) {
483 std::vector<SUnit *>::iterator
I =
llvm::find(TopReadySUs, SU);
484 if (
I == TopReadySUs.end()) {
485 dbgs() <<
"Data Structure Bug in SI Scheduler\n";
488 TopReadySUs.erase(
I);
490 releaseSuccessors(SU,
true);
493 if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->
NodeNum]])
494 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
498 std::map<unsigned, unsigned>::iterator
I =
500 if (
I != NodeNum2Index.end())
501 HasLowLatencyNonWaitedParent[
I->second] = 1;
509 for (
SUnit* SU : SUnits) {
510 releaseSuccessors(SU,
false);
512 HighLatencyBlock =
true;
514 HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
519 unsigned PredID = Pred->
getID();
523 if (PredID ==
P->getID())
526 Preds.push_back(Pred);
531 return PredID == S.first->getID();
533 "Loop in the Block Graph!");
538 unsigned SuccID = Succ->
getID();
541 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) {
542 if (SuccID == S.first->getID()) {
550 ++NumHighLatencySuccessors;
551 Succs.push_back(std::pair(Succ, Kind));
555 "Loop in the Block Graph!");
560 dbgs() <<
"Block (" <<
ID <<
")\n";
564 dbgs() <<
"\nContains High Latency Instruction: "
565 << HighLatencyBlock <<
'\n';
566 dbgs() <<
"\nDepends On:\n";
568 P->printDebug(
false);
571 dbgs() <<
"\nSuccessors:\n";
572 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) {
574 dbgs() <<
"(Data Dep) ";
575 S.first->printDebug(
false);
579 dbgs() <<
"LiveInPressure "
580 << LiveInPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
' '
581 << LiveInPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
'\n';
582 dbgs() <<
"LiveOutPressure "
583 << LiveOutPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
' '
584 << LiveOutPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
"\n\n";
585 dbgs() <<
"LiveIns:\n";
586 for (
unsigned Reg : LiveInRegs)
589 dbgs() <<
"\nLiveOuts:\n";
590 for (
unsigned Reg : LiveOutRegs)
594 dbgs() <<
"\nInstructions:\n";
595 for (
const SUnit* SU : SUnits)
598 dbgs() <<
"///////////////////////\n";
609 std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator
B =
610 Blocks.find(BlockVariant);
611 if (
B == Blocks.end()) {
613 createBlocksForVariant(BlockVariant);
615 scheduleInsideBlocks();
617 Res.
Blocks = CurrentBlocks;
620 Blocks[BlockVariant] = Res;
630 return CurrentBlocks[Node2CurrentBlock[SU->
NodeNum]]->getID() ==
ID;
633void SIScheduleBlockCreator::colorHighLatenciesAlone() {
634 unsigned DAGSize = DAG->
SUnits.size();
636 for (
unsigned i = 0, e = DAGSize; i != e; ++i) {
639 CurrentColoring[SU->
NodeNum] = NextReservedID++;
646 for (
const auto &PredDep : SU.
Preds) {
647 if (PredDep.getSUnit() == &FromSU &&
654void SIScheduleBlockCreator::colorHighLatenciesGroups() {
655 unsigned DAGSize = DAG->
SUnits.size();
656 unsigned NumHighLatencies = 0;
658 int Color = NextReservedID;
660 std::set<unsigned> FormingGroup;
662 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
668 if (NumHighLatencies == 0)
671 if (NumHighLatencies <= 6)
673 else if (NumHighLatencies <= 12)
681 unsigned CompatibleGroup =
true;
682 int ProposedColor = Color;
683 std::vector<int> AdditionalElements;
695 for (
unsigned j : FormingGroup) {
697 std::vector<int> SubGraph;
710 else if (SubGraph.size() > 5) {
712 CompatibleGroup =
false;
717 for (
unsigned k : SubGraph) {
723 (CurrentColoring[k] != ProposedColor &&
724 CurrentColoring[k] != 0)) {
725 CompatibleGroup =
false;
731 CompatibleGroup =
false;
735 if (!CompatibleGroup)
739 CompatibleGroup =
false;
750 if (CompatibleGroup) {
751 FormingGroup.insert(SU.
NodeNum);
752 for (
unsigned j : AdditionalElements)
753 CurrentColoring[
j] = ProposedColor;
754 CurrentColoring[SU.
NodeNum] = ProposedColor;
760 if (!CompatibleGroup) {
761 FormingGroup.clear();
762 Color = ++NextReservedID;
763 ProposedColor = Color;
764 FormingGroup.insert(SU.
NodeNum);
765 CurrentColoring[SU.
NodeNum] = ProposedColor;
767 }
else if (Count == GroupSize) {
768 FormingGroup.clear();
769 Color = ++NextReservedID;
770 ProposedColor = Color;
777void SIScheduleBlockCreator::colorComputeReservedDependencies() {
778 unsigned DAGSize = DAG->
SUnits.size();
779 std::map<std::set<unsigned>,
unsigned> ColorCombinations;
781 CurrentTopDownReservedDependencyColoring.clear();
782 CurrentBottomUpReservedDependencyColoring.clear();
784 CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
785 CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
792 std::set<unsigned> SUColors;
795 if (CurrentColoring[SU->
NodeNum]) {
796 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
805 if (CurrentTopDownReservedDependencyColoring[Pred->
NodeNum] > 0)
806 SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->
NodeNum]);
809 if (SUColors.empty())
812 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
813 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
816 std::map<std::set<unsigned>,
unsigned>::iterator Pos =
817 ColorCombinations.find(SUColors);
818 if (Pos != ColorCombinations.end()) {
819 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] = Pos->second;
821 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
823 ColorCombinations[SUColors] = NextNonReservedID++;
828 ColorCombinations.clear();
834 std::set<unsigned> SUColors;
837 if (CurrentColoring[SU->
NodeNum]) {
838 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
847 if (CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum] > 0)
848 SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum]);
851 if (SUColors.empty())
854 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
855 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
858 std::map<std::set<unsigned>,
unsigned>::iterator Pos =
859 ColorCombinations.find(SUColors);
860 if (Pos != ColorCombinations.end()) {
861 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] = Pos->second;
863 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
865 ColorCombinations[SUColors] = NextNonReservedID++;
871void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
872 std::map<std::pair<unsigned, unsigned>,
unsigned> ColorCombinations;
878 std::pair<unsigned, unsigned> SUColors;
881 if (CurrentColoring[SU.
NodeNum])
884 SUColors.first = CurrentTopDownReservedDependencyColoring[SU.
NodeNum];
885 SUColors.second = CurrentBottomUpReservedDependencyColoring[SU.
NodeNum];
887 std::map<std::pair<unsigned, unsigned>,
unsigned>::iterator Pos =
888 ColorCombinations.find(SUColors);
889 if (Pos != ColorCombinations.end()) {
890 CurrentColoring[SU.
NodeNum] = Pos->second;
892 CurrentColoring[SU.
NodeNum] = NextNonReservedID;
893 ColorCombinations[SUColors] = NextNonReservedID++;
898void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
899 unsigned DAGSize = DAG->
SUnits.size();
900 std::vector<int> PendingColoring = CurrentColoring;
903 CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&
904 CurrentTopDownReservedDependencyColoring.size() == DAGSize);
907 if (*std::max_element(CurrentBottomUpReservedDependencyColoring.begin(),
908 CurrentBottomUpReservedDependencyColoring.end()) == 0 &&
909 *std::max_element(CurrentTopDownReservedDependencyColoring.begin(),
910 CurrentTopDownReservedDependencyColoring.end()) == 0)
915 std::set<unsigned> SUColors;
916 std::set<unsigned> SUColorsPending;
918 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
921 if (CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] > 0 ||
922 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] > 0)
929 if (CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum] > 0 ||
930 CurrentTopDownReservedDependencyColoring[Succ->
NodeNum] > 0)
931 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
932 SUColorsPending.insert(PendingColoring[Succ->
NodeNum]);
937 if (SUColors.size() == 1 && SUColorsPending.size() == 1)
938 PendingColoring[SU->
NodeNum] = *SUColors.begin();
941 PendingColoring[SU->
NodeNum] = NextNonReservedID++;
943 CurrentColoring = PendingColoring;
947void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
948 unsigned DAGSize = DAG->
SUnits.size();
949 unsigned PreviousColor;
950 std::set<unsigned> SeenColors;
955 PreviousColor = CurrentColoring[0];
957 for (
unsigned i = 1, e = DAGSize; i !=
e; ++i) {
959 unsigned CurrentColor = CurrentColoring[i];
960 unsigned PreviousColorSave = PreviousColor;
963 if (CurrentColor != PreviousColor)
964 SeenColors.insert(PreviousColor);
965 PreviousColor = CurrentColor;
967 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
970 if (SeenColors.find(CurrentColor) == SeenColors.end())
973 if (PreviousColorSave != CurrentColor)
974 CurrentColoring[i] = NextNonReservedID++;
976 CurrentColoring[i] = CurrentColoring[i-1];
980void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
981 unsigned DAGSize = DAG->
SUnits.size();
985 std::set<unsigned> SUColors;
987 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
999 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1001 if (SUColors.size() == 1)
1002 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1006void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
1007 unsigned DAGSize = DAG->
SUnits.size();
1011 std::set<unsigned> SUColors;
1013 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1020 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1022 if (SUColors.size() == 1)
1023 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1027void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
1028 unsigned DAGSize = DAG->
SUnits.size();
1032 std::set<unsigned> SUColors;
1034 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1041 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1043 if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
1044 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1048void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
1049 unsigned DAGSize = DAG->
SUnits.size();
1050 std::map<unsigned, unsigned> ColorCount;
1054 unsigned color = CurrentColoring[SU->
NodeNum];
1055 ++ColorCount[color];
1060 unsigned color = CurrentColoring[SU->
NodeNum];
1061 std::set<unsigned> SUColors;
1063 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1066 if (ColorCount[color] > 1)
1073 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1075 if (SUColors.size() == 1 && *SUColors.begin() != color) {
1076 --ColorCount[color];
1077 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1078 ++ColorCount[*SUColors.begin()];
1083void SIScheduleBlockCreator::cutHugeBlocks() {
1087void SIScheduleBlockCreator::regroupNoUserInstructions() {
1088 unsigned DAGSize = DAG->
SUnits.size();
1089 int GroupID = NextNonReservedID++;
1093 bool hasSuccessor =
false;
1095 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1102 hasSuccessor =
true;
1105 CurrentColoring[SU->
NodeNum] = GroupID;
1109void SIScheduleBlockCreator::colorExports() {
1110 unsigned ExportColor = NextNonReservedID++;
1135 "SUnit unexpectedly not representing an instruction!");
1151 for (
unsigned j : ExpGroup)
1152 CurrentColoring[
j] = ExportColor;
1156 unsigned DAGSize = DAG->
SUnits.size();
1157 std::map<unsigned,unsigned> RealID;
1159 CurrentBlocks.clear();
1160 CurrentColoring.clear();
1161 CurrentColoring.resize(DAGSize, 0);
1162 Node2CurrentBlock.clear();
1168 NextNonReservedID = DAGSize + 1;
1173 colorHighLatenciesGroups();
1175 colorHighLatenciesAlone();
1176 colorComputeReservedDependencies();
1177 colorAccordingToReservedDependencies();
1178 colorEndsAccordingToDependencies();
1180 colorForceConsecutiveOrderInGroup();
1181 regroupNoUserInstructions();
1182 colorMergeConstantLoadsNextGroup();
1183 colorMergeIfPossibleNextGroupOnlyForReserved();
1187 Node2CurrentBlock.resize(DAGSize, -1);
1188 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1190 unsigned Color = CurrentColoring[SU->
NodeNum];
1191 if (RealID.find(Color) == RealID.end()) {
1192 int ID = CurrentBlocks.size();
1193 BlockPtrs.push_back(std::make_unique<SIScheduleBlock>(DAG,
this,
ID));
1194 CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
1197 CurrentBlocks[RealID[Color]]->addUnit(SU);
1198 Node2CurrentBlock[SU->
NodeNum] = RealID[Color];
1202 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1204 int SUID = Node2CurrentBlock[i];
1209 if (Node2CurrentBlock[Succ->
NodeNum] != SUID)
1210 CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->
NodeNum]],
1217 if (Node2CurrentBlock[Pred->
NodeNum] != SUID)
1218 CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->
NodeNum]]);
1224 Block->finalizeUnits();
1226 dbgs() <<
"Blocks created:\n\n";
1228 Block->printDebug(
true);
1238 for (;
I !=
End; ++
I) {
1239 if (!
I->isDebugInstr())
1245void SIScheduleBlockCreator::topologicalSort() {
1246 unsigned DAGSize = CurrentBlocks.size();
1247 std::vector<int> WorkList;
1251 WorkList.reserve(DAGSize);
1252 TopDownIndex2Block.resize(DAGSize);
1253 TopDownBlock2Index.resize(DAGSize);
1254 BottomUpIndex2Block.resize(DAGSize);
1256 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1258 unsigned Degree =
Block->getSuccs().size();
1259 TopDownBlock2Index[i] = Degree;
1261 WorkList.push_back(i);
1266 while (!WorkList.empty()) {
1267 int i = WorkList.back();
1269 WorkList.pop_back();
1270 TopDownBlock2Index[i] = --
Id;
1271 TopDownIndex2Block[
Id] = i;
1273 if (!--TopDownBlock2Index[Pred->getID()])
1274 WorkList.push_back(Pred->getID());
1280 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1283 assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&
1284 "Wrong Top Down topological sorting");
1289 BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
1290 TopDownIndex2Block.rend());
1293void SIScheduleBlockCreator::scheduleInsideBlocks() {
1294 unsigned DAGSize = CurrentBlocks.size();
1300 LLVM_DEBUG(
dbgs() <<
"First phase: Fast scheduling for Reg Liveness\n");
1301 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1303 Block->fastSchedule();
1311 std::vector<MachineBasicBlock::iterator> PosOld;
1312 std::vector<MachineBasicBlock::iterator> PosNew;
1313 PosOld.reserve(DAG->
SUnits.size());
1314 PosNew.reserve(DAG->
SUnits.size());
1316 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1317 int BlockIndice = TopDownIndex2Block[i];
1319 std::vector<SUnit*> SUs =
Block->getScheduledUnits();
1321 for (
SUnit* SU : SUs) {
1324 PosOld.push_back(Pos);
1325 if (&*CurrentTopFastSched ==
MI) {
1326 PosNew.push_back(Pos);
1327 CurrentTopFastSched =
nextIfDebug(++CurrentTopFastSched,
1339 PosNew.push_back(CurrentTopFastSched);
1348 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1350 std::vector<SUnit*> SUs =
Block->getScheduledUnits();
1351 Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
1356 for (
unsigned i = PosOld.size(), e = 0; i != e; --i) {
1370 Block->printDebug(
true);
1374void SIScheduleBlockCreator::fillStats() {
1375 unsigned DAGSize = CurrentBlocks.size();
1377 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1378 int BlockIndice = TopDownIndex2Block[i];
1380 if (
Block->getPreds().empty())
1385 if (Depth < Pred->
Depth + Pred->getCost())
1386 Depth = Pred->Depth + Pred->getCost();
1392 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1393 int BlockIndice = BottomUpIndex2Block[i];
1395 if (
Block->getSuccs().empty())
1398 unsigned Height = 0;
1399 for (
const auto &Succ :
Block->getSuccs())
1400 Height = std::max(Height, Succ.first->Height + Succ.first->getCost());
1401 Block->Height = Height;
1411 DAG(DAG), Variant(Variant),
Blocks(BlocksStruct.
Blocks),
1412 LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
1413 SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
1425 LiveOutRegsNumUsages.resize(Blocks.size());
1427 for (
unsigned Reg :
Block->getInRegs()) {
1431 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1432 std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1434 if (RegPos != PredOutRegs.end()) {
1446 ++LiveOutRegsNumUsages[PredID][Reg];
1450 LastPosHighLatencyParentScheduled.resize(
Blocks.size(), 0);
1451 BlockNumPredsLeft.resize(
Blocks.size());
1452 BlockNumSuccsLeft.resize(
Blocks.size());
1454 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1456 BlockNumPredsLeft[i] =
Block->getPreds().size();
1457 BlockNumSuccsLeft[i] =
Block->getSuccs().size();
1461 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1467 std::set<unsigned> InRegs = DAG->
getInRegs();
1468 addLiveRegs(InRegs);
1474 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1478 const std::set<unsigned> &OutRegs =
Block->getOutRegs();
1480 if (OutRegs.find(Reg) == OutRegs.end())
1483 ++LiveOutRegsNumUsages[
ID][Reg];
1491 for (
unsigned Reg :
Block->getInRegs()) {
1494 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1495 std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1497 if (RegPos != PredOutRegs.end()) {
1504 ++LiveRegsConsumers[Reg];
1508 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1510 if (BlockNumPredsLeft[i] == 0) {
1511 ReadyBlocks.push_back(
Block);
1516 BlocksScheduled.push_back(
Block);
1517 blockScheduled(
Block);
1521 : BlocksScheduled) {
1526bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
1527 SIBlockSchedCandidate &TryCand) {
1528 if (!Cand.isValid()) {
1535 Cand.LastPosHighLatParentScheduled, TryCand, Cand,
Latency))
1542 TryCand, Cand,
Depth))
1545 Cand.NumHighLatencySuccessors,
1551bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
1552 SIBlockSchedCandidate &TryCand) {
1553 if (!Cand.isValid()) {
1562 Cand.NumSuccessors > 0,
1574 SIBlockSchedCandidate Cand;
1575 std::vector<SIScheduleBlock*>::iterator Best;
1577 if (ReadyBlocks.empty())
1581 VregCurrentUsage, SregCurrentUsage);
1582 if (VregCurrentUsage > maxVregUsage)
1583 maxVregUsage = VregCurrentUsage;
1584 if (SregCurrentUsage > maxSregUsage)
1585 maxSregUsage = SregCurrentUsage;
1588 : ReadyBlocks)
dbgs()
1589 <<
Block->getID() <<
' ';
1590 dbgs() <<
"\nCurrent Live:\n";
1595 dbgs() <<
"Current VGPRs: " << VregCurrentUsage <<
'\n';
1596 dbgs() <<
"Current SGPRs: " << SregCurrentUsage <<
'\n';);
1598 Cand.Block =
nullptr;
1599 for (std::vector<SIScheduleBlock*>::iterator
I = ReadyBlocks.begin(),
1600 E = ReadyBlocks.end();
I !=
E; ++
I) {
1601 SIBlockSchedCandidate TryCand;
1603 TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
1604 TryCand.VGPRUsageDiff =
1605 checkRegUsageImpact(TryCand.Block->getInRegs(),
1606 TryCand.Block->getOutRegs())[AMDGPU::RegisterPressureSets::VGPR_32];
1607 TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
1608 TryCand.NumHighLatencySuccessors =
1609 TryCand.Block->getNumHighLatencySuccessors();
1610 TryCand.LastPosHighLatParentScheduled =
1611 (
unsigned int) std::max<int> (0,
1612 LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
1613 LastPosWaitedHighLatency);
1614 TryCand.Height = TryCand.Block->Height;
1616 if (VregCurrentUsage > 120 ||
1618 if (!tryCandidateRegUsage(Cand, TryCand) &&
1620 tryCandidateLatency(Cand, TryCand);
1622 if (!tryCandidateLatency(Cand, TryCand))
1623 tryCandidateRegUsage(Cand, TryCand);
1625 if (TryCand.Reason !=
NoCand) {
1626 Cand.setBest(TryCand);
1628 LLVM_DEBUG(
dbgs() <<
"Best Current Choice: " << Cand.Block->getID() <<
' '
1634 dbgs() <<
"Is a block with high latency instruction: "
1635 << (Cand.IsHighLatency ?
"yes\n" :
"no\n");
1636 dbgs() <<
"Position of last high latency dependency: "
1637 << Cand.LastPosHighLatParentScheduled <<
'\n';
1638 dbgs() <<
"VGPRUsageDiff: " << Cand.VGPRUsageDiff <<
'\n';
1642 ReadyBlocks.erase(Best);
1648void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
1651 if (!
Reg.isVirtual())
1654 (void) LiveRegs.insert(Reg);
1659 std::set<unsigned> &Regs) {
1660 for (
unsigned Reg : Regs) {
1662 std::set<unsigned>::iterator Pos = LiveRegs.find(Reg);
1663 assert (Pos != LiveRegs.end() &&
1664 LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&
1665 LiveRegsConsumers[Reg] >= 1);
1666 --LiveRegsConsumers[
Reg];
1667 if (LiveRegsConsumers[Reg] == 0)
1668 LiveRegs.erase(Pos);
1672void SIScheduleBlockScheduler::releaseBlockSuccs(
SIScheduleBlock *Parent) {
1674 if (--BlockNumPredsLeft[
Block.first->getID()] == 0)
1675 ReadyBlocks.push_back(
Block.first);
1679 LastPosHighLatencyParentScheduled[
Block.first->getID()] = NumBlockScheduled;
1685 addLiveRegs(
Block->getOutRegs());
1686 releaseBlockSuccs(
Block);
1687 for (
const auto &RegP : LiveOutRegsNumUsages[
Block->getID()]) {
1689 assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||
1690 LiveRegsConsumers[RegP.first] == 0);
1691 LiveRegsConsumers[RegP.first] += RegP.second;
1693 if (LastPosHighLatencyParentScheduled[
Block->getID()] >
1694 (
unsigned)LastPosWaitedHighLatency)
1695 LastPosWaitedHighLatency =
1696 LastPosHighLatencyParentScheduled[
Block->getID()];
1697 ++NumBlockScheduled;
1701SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
1702 std::set<unsigned> &OutRegs) {
1703 std::vector<int> DiffSetPressure;
1708 if (!
Reg.isVirtual())
1710 if (LiveRegsConsumers[Reg] > 1)
1713 for (; PSetI.
isValid(); ++PSetI) {
1714 DiffSetPressure[*PSetI] -= PSetI.
getWeight();
1720 if (!
Reg.isVirtual())
1723 for (; PSetI.
isValid(); ++PSetI) {
1724 DiffSetPressure[*PSetI] += PSetI.
getWeight();
1728 return DiffSetPressure;
1734SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
1735 SISchedulerBlockSchedulerVariant ScheduleVariant) {
1738 std::vector<SIScheduleBlock*> ScheduledBlocks;
1741 ScheduledBlocks =
Scheduler.getBlocks();
1744 std::vector<SUnit*>
SUs =
Block->getScheduledUnits();
1768void SIScheduleDAGMI::topologicalSort() {
1781void SIScheduleDAGMI::moveLowLatencies() {
1782 unsigned DAGSize =
SUnits.size();
1783 int LastLowLatencyUser = -1;
1784 int LastLowLatencyPos = -1;
1786 for (
unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {
1788 bool IsLowLatencyUser =
false;
1789 unsigned MinPos = 0;
1794 IsLowLatencyUser =
true;
1798 unsigned PredPos = ScheduledSUnitsInv[Pred->
NodeNum];
1799 if (PredPos >= MinPos)
1800 MinPos = PredPos + 1;
1804 unsigned BestPos = LastLowLatencyUser + 1;
1805 if ((
int)BestPos <= LastLowLatencyPos)
1806 BestPos = LastLowLatencyPos + 1;
1807 if (BestPos < MinPos)
1810 for (
unsigned u = i;
u > BestPos; --
u) {
1811 ++ScheduledSUnitsInv[ScheduledSUnits[
u-1]];
1812 ScheduledSUnits[
u] = ScheduledSUnits[
u-1];
1814 ScheduledSUnits[BestPos] = SU->
NodeNum;
1815 ScheduledSUnitsInv[SU->
NodeNum] = BestPos;
1817 LastLowLatencyPos = BestPos;
1818 if (IsLowLatencyUser)
1819 LastLowLatencyUser = BestPos;
1820 }
else if (IsLowLatencyUser) {
1821 LastLowLatencyUser = i;
1825 bool CopyForLowLat =
false;
1831 CopyForLowLat =
true;
1837 for (
unsigned u = i;
u > MinPos; --
u) {
1838 ++ScheduledSUnitsInv[ScheduledSUnits[
u-1]];
1839 ScheduledSUnits[
u] = ScheduledSUnits[
u-1];
1841 ScheduledSUnits[MinPos] = SU->
NodeNum;
1842 ScheduledSUnitsInv[SU->
NodeNum] = MinPos;
1849 for (
unsigned i = 0, e =
SUnits.size(); i != e; ++i) {
1850 SUnits[i].isScheduled =
false;
1851 SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;
1852 SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;
1853 SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;
1854 SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;
1859template<
typename _Iterator>
void
1861 unsigned &VgprUsage,
unsigned &SgprUsage) {
1864 for (_Iterator RegI =
First; RegI !=
End; ++RegI) {
1867 if (!
Reg.isVirtual())
1870 for (; PSetI.
isValid(); ++PSetI) {
1871 if (*PSetI == AMDGPU::RegisterPressureSets::VGPR_32)
1873 else if (*PSetI == AMDGPU::RegisterPressureSets::SReg_32)
1905 SUnitsLinksBackup =
SUnits;
1914 for (
unsigned i = 0, e = (
unsigned)
SUnits.size(); i != e; ++i) {
1920 bool OffsetIsScalable;
1921 if (SITII->getMemOperandWithOffset(*SU->
getInstr(), BaseLatOp, OffLatReg,
1922 OffsetIsScalable,
TRI))
1947 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1948 Temp =
Scheduler.scheduleVariant(v.first, v.second);
1968 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1969 Temp =
Scheduler.scheduleVariant(v.first, v.second);
1975 ScheduledSUnits = Best.
SUs;
1976 ScheduledSUnitsInv.resize(
SUnits.size());
1978 for (
unsigned i = 0, e = (
unsigned)
SUnits.size(); i != e; ++i) {
1979 ScheduledSUnitsInv[ScheduledSUnits[i]] = i;
1989 for (
unsigned I : ScheduledSUnits) {
2003 dbgs() <<
"*** Final schedule for "
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
DenseMap< Block *, BlockRelaxAux > Blocks
Machine Instruction Scheduler
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isDefBetween(unsigned Reg, SlotIndex First, SlotIndex Last, const MachineRegisterInfo *MRI, const LiveIntervals *LIS)
static const char * getReasonStr(SIScheduleCandReason Reason)
static bool hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU)
SI Machine Scheduler interface.
GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
MachineOperand class - Representation of each machine instruction operand.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
PSetIterator getPressureSets(Register RegUnit) const
Get an iterator over the pressure sets affected by the given physical or virtual register.
Iterate over the pressure sets affected by the given physical or virtual register.
unsigned getWeight() const
Track the current register pressure at some position in the instruction stream, and remember the high...
void setPos(MachineBasicBlock::const_iterator Pos)
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
void closeTop()
Set the boundary for the top of the region and summarize live ins.
void advance()
Advance across the current instruction.
void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
Wrapper class representing virtual and physical registers.
@ Data
Regular data dependence (aka true-dependence).
bool isWeak() const
Tests if this a weak dependence.
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
static bool isEXP(const MachineInstr &MI)
bool isHighLatencyDef(int Opc) const override
bool isLowLatencyInstruction(const MachineInstr &MI) const
bool isSUInBlock(SUnit *SU, unsigned ID)
SIScheduleBlockCreator(SIScheduleDAGMI *DAG)
SIScheduleBlocks getBlocks(SISchedulerBlockCreatorVariant BlockVariant)
SIScheduleBlockScheduler(SIScheduleDAGMI *DAG, SISchedulerBlockSchedulerVariant Variant, SIScheduleBlocks BlocksStruct)
ArrayRef< std::pair< SIScheduleBlock *, SIScheduleBlockLinkKind > > getSuccs() const
void addPred(SIScheduleBlock *Pred)
void printDebug(bool Full)
void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind)
void schedule(MachineBasicBlock::iterator BeginBlock, MachineBasicBlock::iterator EndBlock)
void addUnit(SUnit *SU)
Functions for Block construction.
bool isHighLatencyBlock()
void restoreSULinksLeft()
std::vector< int > BottomUpIndex2SU
std::vector< unsigned > IsHighLatencySU
std::vector< unsigned > LowLatencyOffset
std::vector< int > TopDownIndex2SU
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGTopologicalSort * GetTopo()
void fillVgprSgprCost(_Iterator First, _Iterator End, unsigned &VgprUsage, unsigned &SgprUsage)
MachineRegisterInfo * getMRI()
SIScheduleDAGMI(MachineSchedContext *C)
MachineBasicBlock::iterator getCurrentBottom()
std::vector< unsigned > IsLowLatencySU
MachineBasicBlock::iterator getCurrentTop()
std::set< unsigned > getInRegs()
MachineBasicBlock * getBB()
void initRPTracker(RegPressureTracker &RPTracker)
std::set< unsigned > getOutRegs()
~SIScheduleDAGMI() override
const TargetRegisterInfo * getTRI()
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
bool isScheduled
True once scheduled.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
ScheduleDAGTopologicalSort Topo
Topo - A topological ordering for SUnits which permits fast IsReachable and similar queries.
void dumpNode(const SUnit &SU) const override
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
void dump() const override
RegPressureTracker TopRPTracker
void dumpSchedule() const
dump the scheduled Sequence.
std::unique_ptr< MachineSchedStrategy > SchedImpl
void postProcessDAG()
Apply each ScheduleDAGMutation step in order.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
std::vector< int > GetSubGraph(const SUnit &StartSU, const SUnit &TargetSU, bool &Success)
Returns an array of SUs that are both in the successor subtree of StartSU and in the predecessor subt...
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
reverse_iterator rbegin()
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual unsigned getNumRegPressureSets() const =0
Get the number of dimensions of register pressure.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
static bool tryGreater(int TryVal, int CandVal, SISchedulerCandidate &TryCand, SISchedulerCandidate &Cand, SIScheduleCandReason Reason)
static bool tryLess(int TryVal, int CandVal, SISchedulerCandidate &TryCand, SISchedulerCandidate &Cand, SIScheduleCandReason Reason)
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > PrintDAGs
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
SISchedulerBlockSchedulerVariant
cl::opt< bool > ViewMISchedDAGs
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SISchedulerBlockCreatorVariant
@ LatenciesAlonePlusConsecutive
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Implement std::hash so that hash_code can be used in STL containers.
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
std::vector< unsigned > SUs
std::vector< int > TopDownIndex2Block
std::vector< SIScheduleBlock * > Blocks
std::vector< int > TopDownBlock2Index
SIScheduleCandReason Reason
void setRepeat(SIScheduleCandReason R)