22#define DEBUG_TYPE "machine-scheduler"
126 case NoCand:
return "NOCAND";
128 case Latency:
return "LATENCY";
130 case Depth:
return "DEPTH";
144 if (TryVal < CandVal) {
148 if (TryVal > CandVal) {
161 if (TryVal > CandVal) {
165 if (TryVal < CandVal) {
179 NodeNum2Index[SU->
NodeNum] = SUnits.size();
180 SUnits.push_back(SU);
184void SIScheduleBlock::traceCandidate(
const SISchedCandidate &Cand) {
191void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
192 SISchedCandidate &TryCand) {
194 if (!Cand.isValid()) {
199 if (Cand.SGPRUsage > 60 &&
220 Cand.HasLowLatencyNonWaitedParent,
228 if (TryCand.IsLowLatency &&
238 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
243SUnit* SIScheduleBlock::pickNode() {
244 SISchedCandidate TopCand;
246 for (
SUnit* SU : TopReadySUs) {
247 SISchedCandidate TryCand;
248 std::vector<unsigned> pressure;
249 std::vector<unsigned> MaxPressure;
253 TryCand.SGPRUsage = pressure[AMDGPU::RegisterPressureSets::SReg_32];
254 TryCand.VGPRUsage = pressure[AMDGPU::RegisterPressureSets::VGPR_32];
257 TryCand.HasLowLatencyNonWaitedParent =
258 HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
259 tryCandidateTopDown(TopCand, TryCand);
260 if (TryCand.Reason !=
NoCand)
261 TopCand.setBest(TryCand);
274 for (
SUnit* SU : SUnits) {
275 if (!SU->NumPredsLeft)
276 TopReadySUs.push_back(SU);
279 while (!TopReadySUs.empty()) {
280 SUnit *SU = TopReadySUs[0];
281 ScheduledSUnits.push_back(SU);
294 UI =
MRI->def_instr_begin(Reg),
295 UE =
MRI->def_instr_end(); UI != UE; ++UI) {
297 if (
MI->isDebugValue())
300 if (InstSlot >=
First && InstSlot <=
Last)
318 for (
SUnit* SU : ScheduledSUnits) {
319 RPTracker.setPos(SU->getInstr());
324 RPTracker.closeRegion();
327 TopRPTracker.
addLiveRegs(RPTracker.getPressure().LiveInRegs);
328 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
331 for (
const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
332 if (RegMaskPair.RegUnit.isVirtual())
333 LiveInRegs.insert(RegMaskPair.RegUnit);
358 for (
const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
360 if (
Reg.isVirtual() &&
364 LiveOutRegs.insert(Reg);
385 initRegPressure(BeginBlock, EndBlock);
392 for (
SUnit* SU : SUnits) {
393 if (!SU->NumPredsLeft)
394 TopReadySUs.push_back(SU);
397 while (!TopReadySUs.empty()) {
398 SUnit *SU = pickNode();
399 ScheduledSUnits.push_back(SU);
406 InternalAdditionalPressure.resize(TopPressure.
MaxSetPressure.size());
410 assert(SUnits.size() == ScheduledSUnits.size() &&
411 TopReadySUs.empty());
412 for (
SUnit* SU : SUnits) {
414 SU->NumPredsLeft == 0);
421void SIScheduleBlock::undoSchedule() {
422 for (
SUnit* SU : SUnits) {
423 SU->isScheduled =
false;
424 for (
SDep& Succ : SU->Succs) {
426 undoReleaseSucc(SU, &Succ);
429 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
430 ScheduledSUnits.clear();
434void SIScheduleBlock::undoReleaseSucc(
SUnit *SU,
SDep *SuccEdge) {
444void SIScheduleBlock::releaseSucc(
SUnit *SU,
SDep *SuccEdge) {
453 dbgs() <<
"*** Scheduling failed! ***\n";
455 dbgs() <<
" has been released too many times!\n";
464void SIScheduleBlock::releaseSuccessors(
SUnit *SU,
bool InOrOutBlock) {
474 releaseSucc(SU, &Succ);
476 TopReadySUs.push_back(SuccSU);
480void SIScheduleBlock::nodeScheduled(
SUnit *SU) {
483 std::vector<SUnit *>::iterator
I =
llvm::find(TopReadySUs, SU);
484 if (
I == TopReadySUs.end()) {
485 dbgs() <<
"Data Structure Bug in SI Scheduler\n";
488 TopReadySUs.erase(
I);
490 releaseSuccessors(SU,
true);
493 if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->
NodeNum]])
494 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
498 std::map<unsigned, unsigned>::iterator
I =
500 if (
I != NodeNum2Index.end())
501 HasLowLatencyNonWaitedParent[
I->second] = 1;
509 for (
SUnit* SU : SUnits) {
510 releaseSuccessors(SU,
false);
512 HighLatencyBlock =
true;
514 HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
519 unsigned PredID = Pred->
getID();
523 if (PredID ==
P->getID())
526 Preds.push_back(Pred);
531 return PredID == S.first->getID();
533 "Loop in the Block Graph!");
538 unsigned SuccID = Succ->
getID();
541 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) {
542 if (SuccID == S.first->getID()) {
550 ++NumHighLatencySuccessors;
551 Succs.push_back(std::pair(Succ, Kind));
555 "Loop in the Block Graph!");
560 dbgs() <<
"Block (" <<
ID <<
")\n";
564 dbgs() <<
"\nContains High Latency Instruction: "
565 << HighLatencyBlock <<
'\n';
566 dbgs() <<
"\nDepends On:\n";
568 P->printDebug(
false);
571 dbgs() <<
"\nSuccessors:\n";
572 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) {
574 dbgs() <<
"(Data Dep) ";
575 S.first->printDebug(
false);
579 dbgs() <<
"LiveInPressure "
580 << LiveInPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
' '
581 << LiveInPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
'\n';
582 dbgs() <<
"LiveOutPressure "
583 << LiveOutPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
' '
584 << LiveOutPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
"\n\n";
585 dbgs() <<
"LiveIns:\n";
586 for (
unsigned Reg : LiveInRegs)
589 dbgs() <<
"\nLiveOuts:\n";
590 for (
unsigned Reg : LiveOutRegs)
594 dbgs() <<
"\nInstructions:\n";
595 for (
const SUnit* SU : SUnits)
598 dbgs() <<
"///////////////////////\n";
609 std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator
B =
610 Blocks.find(BlockVariant);
611 if (
B == Blocks.end()) {
613 createBlocksForVariant(BlockVariant);
615 scheduleInsideBlocks();
617 Res.
Blocks = CurrentBlocks;
620 Blocks[BlockVariant] = Res;
630 return CurrentBlocks[Node2CurrentBlock[SU->
NodeNum]]->getID() ==
ID;
633void SIScheduleBlockCreator::colorHighLatenciesAlone() {
634 unsigned DAGSize = DAG->
SUnits.size();
636 for (
unsigned i = 0, e = DAGSize; i != e; ++i) {
639 CurrentColoring[SU->
NodeNum] = NextReservedID++;
646 for (
const auto &PredDep : SU.
Preds) {
647 if (PredDep.getSUnit() == &FromSU &&
654void SIScheduleBlockCreator::colorHighLatenciesGroups() {
655 unsigned DAGSize = DAG->
SUnits.size();
656 unsigned NumHighLatencies = 0;
658 int Color = NextReservedID;
660 std::set<unsigned> FormingGroup;
662 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
668 if (NumHighLatencies == 0)
671 if (NumHighLatencies <= 6)
673 else if (NumHighLatencies <= 12)
681 unsigned CompatibleGroup =
true;
682 int ProposedColor = Color;
683 std::vector<int> AdditionalElements;
695 for (
unsigned j : FormingGroup) {
697 std::vector<int> SubGraph;
710 else if (SubGraph.size() > 5) {
712 CompatibleGroup =
false;
717 for (
unsigned k : SubGraph) {
723 (CurrentColoring[k] != ProposedColor &&
724 CurrentColoring[k] != 0)) {
725 CompatibleGroup =
false;
731 CompatibleGroup =
false;
735 if (!CompatibleGroup)
739 CompatibleGroup =
false;
750 if (CompatibleGroup) {
751 FormingGroup.insert(SU.
NodeNum);
752 for (
unsigned j : AdditionalElements)
753 CurrentColoring[
j] = ProposedColor;
754 CurrentColoring[SU.
NodeNum] = ProposedColor;
760 if (!CompatibleGroup) {
761 FormingGroup.clear();
762 Color = ++NextReservedID;
763 ProposedColor = Color;
764 FormingGroup.insert(SU.
NodeNum);
765 CurrentColoring[SU.
NodeNum] = ProposedColor;
767 }
else if (Count == GroupSize) {
768 FormingGroup.clear();
769 Color = ++NextReservedID;
770 ProposedColor = Color;
777void SIScheduleBlockCreator::colorComputeReservedDependencies() {
778 unsigned DAGSize = DAG->
SUnits.size();
779 std::map<std::set<unsigned>,
unsigned> ColorCombinations;
781 CurrentTopDownReservedDependencyColoring.clear();
782 CurrentBottomUpReservedDependencyColoring.clear();
784 CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
785 CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
792 std::set<unsigned> SUColors;
795 if (CurrentColoring[SU->
NodeNum]) {
796 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
805 if (CurrentTopDownReservedDependencyColoring[Pred->
NodeNum] > 0)
806 SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->
NodeNum]);
809 if (SUColors.empty())
812 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
813 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
816 std::map<std::set<unsigned>,
unsigned>::iterator Pos =
817 ColorCombinations.find(SUColors);
818 if (Pos != ColorCombinations.end()) {
819 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] = Pos->second;
821 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
823 ColorCombinations[SUColors] = NextNonReservedID++;
828 ColorCombinations.clear();
834 std::set<unsigned> SUColors;
837 if (CurrentColoring[SU->
NodeNum]) {
838 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
847 if (CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum] > 0)
848 SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum]);
851 if (SUColors.empty())
854 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
855 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
858 std::map<std::set<unsigned>,
unsigned>::iterator Pos =
859 ColorCombinations.find(SUColors);
860 if (Pos != ColorCombinations.end()) {
861 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] = Pos->second;
863 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
865 ColorCombinations[SUColors] = NextNonReservedID++;
871void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
872 std::map<std::pair<unsigned, unsigned>,
unsigned> ColorCombinations;
878 std::pair<unsigned, unsigned> SUColors;
881 if (CurrentColoring[SU.
NodeNum])
884 SUColors.first = CurrentTopDownReservedDependencyColoring[SU.
NodeNum];
885 SUColors.second = CurrentBottomUpReservedDependencyColoring[SU.
NodeNum];
887 std::map<std::pair<unsigned, unsigned>,
unsigned>::iterator Pos =
888 ColorCombinations.find(SUColors);
889 if (Pos != ColorCombinations.end()) {
890 CurrentColoring[SU.
NodeNum] = Pos->second;
892 CurrentColoring[SU.
NodeNum] = NextNonReservedID;
893 ColorCombinations[SUColors] = NextNonReservedID++;
898void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
899 unsigned DAGSize = DAG->
SUnits.size();
900 std::vector<int> PendingColoring = CurrentColoring;
903 CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&
904 CurrentTopDownReservedDependencyColoring.size() == DAGSize);
913 std::set<unsigned> SUColors;
914 std::set<unsigned> SUColorsPending;
916 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
919 if (CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] > 0 ||
920 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] > 0)
927 if (CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum] > 0 ||
928 CurrentTopDownReservedDependencyColoring[Succ->
NodeNum] > 0)
929 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
930 SUColorsPending.insert(PendingColoring[Succ->
NodeNum]);
935 if (SUColors.size() == 1 && SUColorsPending.size() == 1)
936 PendingColoring[SU->
NodeNum] = *SUColors.begin();
939 PendingColoring[SU->
NodeNum] = NextNonReservedID++;
941 CurrentColoring = PendingColoring;
945void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
946 unsigned DAGSize = DAG->
SUnits.size();
947 unsigned PreviousColor;
948 std::set<unsigned> SeenColors;
953 PreviousColor = CurrentColoring[0];
955 for (
unsigned i = 1, e = DAGSize; i !=
e; ++i) {
957 unsigned CurrentColor = CurrentColoring[i];
958 unsigned PreviousColorSave = PreviousColor;
961 if (CurrentColor != PreviousColor)
962 SeenColors.insert(PreviousColor);
963 PreviousColor = CurrentColor;
965 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
968 if (SeenColors.find(CurrentColor) == SeenColors.end())
971 if (PreviousColorSave != CurrentColor)
972 CurrentColoring[i] = NextNonReservedID++;
974 CurrentColoring[i] = CurrentColoring[i-1];
978void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
979 unsigned DAGSize = DAG->
SUnits.size();
983 std::set<unsigned> SUColors;
985 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
997 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
999 if (SUColors.size() == 1)
1000 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1004void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
1005 unsigned DAGSize = DAG->
SUnits.size();
1009 std::set<unsigned> SUColors;
1011 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1018 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1020 if (SUColors.size() == 1)
1021 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1025void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
1026 unsigned DAGSize = DAG->
SUnits.size();
1030 std::set<unsigned> SUColors;
1032 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1039 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1041 if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
1042 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1046void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
1047 unsigned DAGSize = DAG->
SUnits.size();
1048 std::map<unsigned, unsigned> ColorCount;
1052 unsigned color = CurrentColoring[SU->
NodeNum];
1053 ++ColorCount[color];
1058 unsigned color = CurrentColoring[SU->
NodeNum];
1059 std::set<unsigned> SUColors;
1061 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1064 if (ColorCount[color] > 1)
1071 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1073 if (SUColors.size() == 1 && *SUColors.begin() != color) {
1074 --ColorCount[color];
1075 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1076 ++ColorCount[*SUColors.begin()];
1081void SIScheduleBlockCreator::cutHugeBlocks() {
1085void SIScheduleBlockCreator::regroupNoUserInstructions() {
1086 unsigned DAGSize = DAG->
SUnits.size();
1087 int GroupID = NextNonReservedID++;
1091 bool hasSuccessor =
false;
1093 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1100 hasSuccessor =
true;
1103 CurrentColoring[SU->
NodeNum] = GroupID;
1107void SIScheduleBlockCreator::colorExports() {
1108 unsigned ExportColor = NextNonReservedID++;
1133 "SUnit unexpectedly not representing an instruction!");
1149 for (
unsigned j : ExpGroup)
1150 CurrentColoring[
j] = ExportColor;
1154 unsigned DAGSize = DAG->
SUnits.size();
1155 std::map<unsigned,unsigned> RealID;
1157 CurrentBlocks.clear();
1158 CurrentColoring.clear();
1159 CurrentColoring.resize(DAGSize, 0);
1160 Node2CurrentBlock.clear();
1166 NextNonReservedID = DAGSize + 1;
1171 colorHighLatenciesGroups();
1173 colorHighLatenciesAlone();
1174 colorComputeReservedDependencies();
1175 colorAccordingToReservedDependencies();
1176 colorEndsAccordingToDependencies();
1178 colorForceConsecutiveOrderInGroup();
1179 regroupNoUserInstructions();
1180 colorMergeConstantLoadsNextGroup();
1181 colorMergeIfPossibleNextGroupOnlyForReserved();
1185 Node2CurrentBlock.resize(DAGSize, -1);
1186 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1188 unsigned Color = CurrentColoring[SU->
NodeNum];
1189 if (RealID.find(Color) == RealID.end()) {
1190 int ID = CurrentBlocks.size();
1191 BlockPtrs.push_back(std::make_unique<SIScheduleBlock>(DAG,
this,
ID));
1192 CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
1195 CurrentBlocks[RealID[Color]]->addUnit(SU);
1196 Node2CurrentBlock[SU->
NodeNum] = RealID[Color];
1200 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1202 int SUID = Node2CurrentBlock[i];
1207 if (Node2CurrentBlock[Succ->
NodeNum] != SUID)
1208 CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->
NodeNum]],
1215 if (Node2CurrentBlock[Pred->
NodeNum] != SUID)
1216 CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->
NodeNum]]);
1222 Block->finalizeUnits();
1224 dbgs() <<
"Blocks created:\n\n";
1226 Block->printDebug(
true);
1236 for (;
I !=
End; ++
I) {
1237 if (!
I->isDebugInstr())
1243void SIScheduleBlockCreator::topologicalSort() {
1244 unsigned DAGSize = CurrentBlocks.size();
1245 std::vector<int> WorkList;
1249 WorkList.reserve(DAGSize);
1250 TopDownIndex2Block.resize(DAGSize);
1251 TopDownBlock2Index.resize(DAGSize);
1252 BottomUpIndex2Block.resize(DAGSize);
1254 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1256 unsigned Degree =
Block->getSuccs().size();
1257 TopDownBlock2Index[i] = Degree;
1259 WorkList.push_back(i);
1264 while (!WorkList.empty()) {
1265 int i = WorkList.back();
1267 WorkList.pop_back();
1268 TopDownBlock2Index[i] = --
Id;
1269 TopDownIndex2Block[
Id] = i;
1271 if (!--TopDownBlock2Index[Pred->getID()])
1272 WorkList.push_back(Pred->getID());
1278 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1281 assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&
1282 "Wrong Top Down topological sorting");
1287 BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
1288 TopDownIndex2Block.rend());
1291void SIScheduleBlockCreator::scheduleInsideBlocks() {
1292 unsigned DAGSize = CurrentBlocks.size();
1298 LLVM_DEBUG(
dbgs() <<
"First phase: Fast scheduling for Reg Liveness\n");
1299 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1301 Block->fastSchedule();
1309 std::vector<MachineBasicBlock::iterator> PosOld;
1310 std::vector<MachineBasicBlock::iterator> PosNew;
1311 PosOld.reserve(DAG->
SUnits.size());
1312 PosNew.reserve(DAG->
SUnits.size());
1314 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1315 int BlockIndice = TopDownIndex2Block[i];
1317 std::vector<SUnit*> SUs =
Block->getScheduledUnits();
1319 for (
SUnit* SU : SUs) {
1322 PosOld.push_back(Pos);
1323 if (&*CurrentTopFastSched ==
MI) {
1324 PosNew.push_back(Pos);
1325 CurrentTopFastSched =
nextIfDebug(++CurrentTopFastSched,
1337 PosNew.push_back(CurrentTopFastSched);
1346 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1348 std::vector<SUnit*> SUs =
Block->getScheduledUnits();
1349 Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
1354 for (
unsigned i = PosOld.size(), e = 0; i != e; --i) {
1368 Block->printDebug(
true);
1372void SIScheduleBlockCreator::fillStats() {
1373 unsigned DAGSize = CurrentBlocks.size();
1375 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1376 int BlockIndice = TopDownIndex2Block[i];
1378 if (
Block->getPreds().empty())
1383 if (Depth < Pred->
Depth + Pred->getCost())
1384 Depth = Pred->Depth + Pred->getCost();
1390 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1391 int BlockIndice = BottomUpIndex2Block[i];
1393 if (
Block->getSuccs().empty())
1396 unsigned Height = 0;
1397 for (
const auto &Succ :
Block->getSuccs())
1398 Height = std::max(Height, Succ.first->Height + Succ.first->getCost());
1399 Block->Height = Height;
1409 DAG(DAG), Variant(Variant),
Blocks(BlocksStruct.
Blocks),
1410 LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
1411 SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
1423 LiveOutRegsNumUsages.resize(Blocks.size());
1425 for (
unsigned Reg :
Block->getInRegs()) {
1429 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1430 std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1432 if (RegPos != PredOutRegs.end()) {
1444 ++LiveOutRegsNumUsages[PredID][Reg];
1448 LastPosHighLatencyParentScheduled.resize(
Blocks.size(), 0);
1449 BlockNumPredsLeft.resize(
Blocks.size());
1450 BlockNumSuccsLeft.resize(
Blocks.size());
1452 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1454 BlockNumPredsLeft[i] =
Block->getPreds().size();
1455 BlockNumSuccsLeft[i] =
Block->getSuccs().size();
1459 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1465 std::set<unsigned> InRegs = DAG->
getInRegs();
1466 addLiveRegs(InRegs);
1472 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1476 const std::set<unsigned> &OutRegs =
Block->getOutRegs();
1478 if (OutRegs.find(Reg) == OutRegs.end())
1481 ++LiveOutRegsNumUsages[
ID][Reg];
1489 for (
unsigned Reg :
Block->getInRegs()) {
1492 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1493 std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1495 if (RegPos != PredOutRegs.end()) {
1502 ++LiveRegsConsumers[Reg];
1506 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1508 if (BlockNumPredsLeft[i] == 0) {
1509 ReadyBlocks.push_back(
Block);
1514 BlocksScheduled.push_back(
Block);
1515 blockScheduled(
Block);
1519 : BlocksScheduled) {
1524bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
1525 SIBlockSchedCandidate &TryCand) {
1526 if (!Cand.isValid()) {
1533 Cand.LastPosHighLatParentScheduled, TryCand, Cand,
Latency))
1540 TryCand, Cand,
Depth))
1543 Cand.NumHighLatencySuccessors,
1549bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
1550 SIBlockSchedCandidate &TryCand) {
1551 if (!Cand.isValid()) {
1560 Cand.NumSuccessors > 0,
1572 SIBlockSchedCandidate Cand;
1573 std::vector<SIScheduleBlock*>::iterator Best;
1575 if (ReadyBlocks.empty())
1579 VregCurrentUsage, SregCurrentUsage);
1580 if (VregCurrentUsage > maxVregUsage)
1581 maxVregUsage = VregCurrentUsage;
1582 if (SregCurrentUsage > maxSregUsage)
1583 maxSregUsage = SregCurrentUsage;
1586 : ReadyBlocks)
dbgs()
1587 <<
Block->getID() <<
' ';
1588 dbgs() <<
"\nCurrent Live:\n";
1593 dbgs() <<
"Current VGPRs: " << VregCurrentUsage <<
'\n';
1594 dbgs() <<
"Current SGPRs: " << SregCurrentUsage <<
'\n';);
1596 Cand.Block =
nullptr;
1597 for (std::vector<SIScheduleBlock*>::iterator
I = ReadyBlocks.begin(),
1598 E = ReadyBlocks.end();
I != E; ++
I) {
1599 SIBlockSchedCandidate TryCand;
1601 TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
1602 TryCand.VGPRUsageDiff =
1603 checkRegUsageImpact(TryCand.Block->getInRegs(),
1604 TryCand.Block->getOutRegs())[AMDGPU::RegisterPressureSets::VGPR_32];
1605 TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
1606 TryCand.NumHighLatencySuccessors =
1607 TryCand.Block->getNumHighLatencySuccessors();
1608 TryCand.LastPosHighLatParentScheduled =
1609 (
unsigned int) std::max<int> (0,
1610 LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
1611 LastPosWaitedHighLatency);
1612 TryCand.Height = TryCand.Block->Height;
1614 if (VregCurrentUsage > 120 ||
1616 if (!tryCandidateRegUsage(Cand, TryCand) &&
1618 tryCandidateLatency(Cand, TryCand);
1620 if (!tryCandidateLatency(Cand, TryCand))
1621 tryCandidateRegUsage(Cand, TryCand);
1623 if (TryCand.Reason !=
NoCand) {
1624 Cand.setBest(TryCand);
1626 LLVM_DEBUG(
dbgs() <<
"Best Current Choice: " << Cand.Block->getID() <<
' '
1632 dbgs() <<
"Is a block with high latency instruction: "
1633 << (Cand.IsHighLatency ?
"yes\n" :
"no\n");
1634 dbgs() <<
"Position of last high latency dependency: "
1635 << Cand.LastPosHighLatParentScheduled <<
'\n';
1636 dbgs() <<
"VGPRUsageDiff: " << Cand.VGPRUsageDiff <<
'\n';
1640 ReadyBlocks.erase(Best);
1646void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
1649 if (!
Reg.isVirtual())
1652 (void) LiveRegs.insert(Reg);
1657 std::set<unsigned> &Regs) {
1658 for (
unsigned Reg : Regs) {
1660 std::set<unsigned>::iterator Pos = LiveRegs.find(Reg);
1661 assert (Pos != LiveRegs.end() &&
1662 LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&
1663 LiveRegsConsumers[Reg] >= 1);
1664 --LiveRegsConsumers[
Reg];
1665 if (LiveRegsConsumers[Reg] == 0)
1666 LiveRegs.erase(Pos);
1670void SIScheduleBlockScheduler::releaseBlockSuccs(
SIScheduleBlock *Parent) {
1672 if (--BlockNumPredsLeft[
Block.first->getID()] == 0)
1673 ReadyBlocks.push_back(
Block.first);
1677 LastPosHighLatencyParentScheduled[
Block.first->getID()] = NumBlockScheduled;
1683 addLiveRegs(
Block->getOutRegs());
1684 releaseBlockSuccs(
Block);
1685 for (
const auto &RegP : LiveOutRegsNumUsages[
Block->getID()]) {
1687 assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||
1688 LiveRegsConsumers[RegP.first] == 0);
1689 LiveRegsConsumers[RegP.first] += RegP.second;
1691 if (LastPosHighLatencyParentScheduled[
Block->getID()] >
1692 (
unsigned)LastPosWaitedHighLatency)
1693 LastPosWaitedHighLatency =
1694 LastPosHighLatencyParentScheduled[
Block->getID()];
1695 ++NumBlockScheduled;
1699SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
1700 std::set<unsigned> &OutRegs) {
1701 std::vector<int> DiffSetPressure;
1706 if (!
Reg.isVirtual())
1708 if (LiveRegsConsumers[Reg] > 1)
1711 for (; PSetI.
isValid(); ++PSetI) {
1712 DiffSetPressure[*PSetI] -= PSetI.
getWeight();
1718 if (!
Reg.isVirtual())
1721 for (; PSetI.
isValid(); ++PSetI) {
1722 DiffSetPressure[*PSetI] += PSetI.
getWeight();
1726 return DiffSetPressure;
1732SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
1733 SISchedulerBlockSchedulerVariant ScheduleVariant) {
1736 std::vector<SIScheduleBlock*> ScheduledBlocks;
1739 ScheduledBlocks =
Scheduler.getBlocks();
1742 std::vector<SUnit*>
SUs =
Block->getScheduledUnits();
1766void SIScheduleDAGMI::topologicalSort() {
1779void SIScheduleDAGMI::moveLowLatencies() {
1780 unsigned DAGSize =
SUnits.size();
1781 int LastLowLatencyUser = -1;
1782 int LastLowLatencyPos = -1;
1784 for (
unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {
1786 bool IsLowLatencyUser =
false;
1787 unsigned MinPos = 0;
1792 IsLowLatencyUser =
true;
1796 unsigned PredPos = ScheduledSUnitsInv[Pred->
NodeNum];
1797 if (PredPos >= MinPos)
1798 MinPos = PredPos + 1;
1802 unsigned BestPos = LastLowLatencyUser + 1;
1803 if ((
int)BestPos <= LastLowLatencyPos)
1804 BestPos = LastLowLatencyPos + 1;
1805 if (BestPos < MinPos)
1808 for (
unsigned u = i;
u > BestPos; --
u) {
1809 ++ScheduledSUnitsInv[ScheduledSUnits[
u-1]];
1810 ScheduledSUnits[
u] = ScheduledSUnits[
u-1];
1812 ScheduledSUnits[BestPos] = SU->
NodeNum;
1813 ScheduledSUnitsInv[SU->
NodeNum] = BestPos;
1815 LastLowLatencyPos = BestPos;
1816 if (IsLowLatencyUser)
1817 LastLowLatencyUser = BestPos;
1818 }
else if (IsLowLatencyUser) {
1819 LastLowLatencyUser = i;
1823 bool CopyForLowLat =
false;
1829 CopyForLowLat =
true;
1835 for (
unsigned u = i;
u > MinPos; --
u) {
1836 ++ScheduledSUnitsInv[ScheduledSUnits[
u-1]];
1837 ScheduledSUnits[
u] = ScheduledSUnits[
u-1];
1839 ScheduledSUnits[MinPos] = SU->
NodeNum;
1840 ScheduledSUnitsInv[SU->
NodeNum] = MinPos;
1847 for (
unsigned i = 0, e =
SUnits.size(); i != e; ++i) {
1848 SUnits[i].isScheduled =
false;
1849 SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;
1850 SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;
1851 SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;
1852 SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;
1857template<
typename _Iterator>
void
1859 unsigned &VgprUsage,
unsigned &SgprUsage) {
1862 for (_Iterator RegI =
First; RegI !=
End; ++RegI) {
1865 if (!
Reg.isVirtual())
1868 for (; PSetI.
isValid(); ++PSetI) {
1869 if (*PSetI == AMDGPU::RegisterPressureSets::VGPR_32)
1871 else if (*PSetI == AMDGPU::RegisterPressureSets::SReg_32)
1903 SUnitsLinksBackup =
SUnits;
1912 for (
unsigned i = 0, e = (
unsigned)
SUnits.size(); i != e; ++i) {
1918 bool OffsetIsScalable;
1919 if (SITII->getMemOperandWithOffset(*SU->
getInstr(), BaseLatOp, OffLatReg,
1920 OffsetIsScalable,
TRI))
1945 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1946 Temp =
Scheduler.scheduleVariant(v.first, v.second);
1966 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1967 Temp =
Scheduler.scheduleVariant(v.first, v.second);
1973 ScheduledSUnits = Best.
SUs;
1974 ScheduledSUnitsInv.resize(
SUnits.size());
1976 for (
unsigned i = 0, e = (
unsigned)
SUnits.size(); i != e; ++i) {
1977 ScheduledSUnitsInv[ScheduledSUnits[i]] = i;
1987 for (
unsigned I : ScheduledSUnits) {
2001 dbgs() <<
"*** Final schedule for "
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
DenseMap< Block *, BlockRelaxAux > Blocks
Machine Instruction Scheduler
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isDefBetween(unsigned Reg, SlotIndex First, SlotIndex Last, const MachineRegisterInfo *MRI, const LiveIntervals *LIS)
static const char * getReasonStr(SIScheduleCandReason Reason)
static bool hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU)
SI Machine Scheduler interface.
GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
MachineOperand class - Representation of each machine instruction operand.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
PSetIterator getPressureSets(Register RegUnit) const
Get an iterator over the pressure sets affected by the given physical or virtual register.
Iterate over the pressure sets affected by the given physical or virtual register.
unsigned getWeight() const
Track the current register pressure at some position in the instruction stream, and remember the high...
void setPos(MachineBasicBlock::const_iterator Pos)
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
void closeTop()
Set the boundary for the top of the region and summarize live ins.
void advance()
Advance across the current instruction.
void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
Wrapper class representing virtual and physical registers.
@ Data
Regular data dependence (aka true-dependence).
bool isWeak() const
Tests if this a weak dependence.
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
static bool isEXP(const MachineInstr &MI)
bool isHighLatencyDef(int Opc) const override
bool isLowLatencyInstruction(const MachineInstr &MI) const
bool isSUInBlock(SUnit *SU, unsigned ID)
SIScheduleBlockCreator(SIScheduleDAGMI *DAG)
SIScheduleBlocks getBlocks(SISchedulerBlockCreatorVariant BlockVariant)
SIScheduleBlockScheduler(SIScheduleDAGMI *DAG, SISchedulerBlockSchedulerVariant Variant, SIScheduleBlocks BlocksStruct)
ArrayRef< std::pair< SIScheduleBlock *, SIScheduleBlockLinkKind > > getSuccs() const
void addPred(SIScheduleBlock *Pred)
void printDebug(bool Full)
void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind)
void schedule(MachineBasicBlock::iterator BeginBlock, MachineBasicBlock::iterator EndBlock)
void addUnit(SUnit *SU)
Functions for Block construction.
bool isHighLatencyBlock()
void restoreSULinksLeft()
std::vector< int > BottomUpIndex2SU
std::vector< unsigned > IsHighLatencySU
std::vector< unsigned > LowLatencyOffset
std::vector< int > TopDownIndex2SU
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGTopologicalSort * GetTopo()
void fillVgprSgprCost(_Iterator First, _Iterator End, unsigned &VgprUsage, unsigned &SgprUsage)
MachineRegisterInfo * getMRI()
SIScheduleDAGMI(MachineSchedContext *C)
MachineBasicBlock::iterator getCurrentBottom()
std::vector< unsigned > IsLowLatencySU
MachineBasicBlock::iterator getCurrentTop()
std::set< unsigned > getInRegs()
MachineBasicBlock * getBB()
void initRPTracker(RegPressureTracker &RPTracker)
std::set< unsigned > getOutRegs()
~SIScheduleDAGMI() override
const TargetRegisterInfo * getTRI()
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
bool isScheduled
True once scheduled.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
ScheduleDAGTopologicalSort Topo
Topo - A topological ordering for SUnits which permits fast IsReachable and similar queries.
void dumpNode(const SUnit &SU) const override
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
void dump() const override
RegPressureTracker TopRPTracker
void dumpSchedule() const
dump the scheduled Sequence.
std::unique_ptr< MachineSchedStrategy > SchedImpl
void postProcessDAG()
Apply each ScheduleDAGMutation step in order.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
std::vector< int > GetSubGraph(const SUnit &StartSU, const SUnit &TargetSU, bool &Success)
Returns an array of SUs that are both in the successor subtree of StartSU and in the predecessor subt...
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
reverse_iterator rbegin()
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual unsigned getNumRegPressureSets() const =0
Get the number of dimensions of register pressure.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
static bool tryGreater(int TryVal, int CandVal, SISchedulerCandidate &TryCand, SISchedulerCandidate &Cand, SIScheduleCandReason Reason)
static bool tryLess(int TryVal, int CandVal, SISchedulerCandidate &TryCand, SISchedulerCandidate &Cand, SIScheduleCandReason Reason)
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > PrintDAGs
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
SISchedulerBlockSchedulerVariant
cl::opt< bool > ViewMISchedDAGs
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SISchedulerBlockCreatorVariant
@ LatenciesAlonePlusConsecutive
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
auto max_element(R &&Range)
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Implement std::hash so that hash_code can be used in STL containers.
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
std::vector< unsigned > SUs
std::vector< int > TopDownIndex2Block
std::vector< SIScheduleBlock * > Blocks
std::vector< int > TopDownBlock2Index
SIScheduleCandReason Reason
void setRepeat(SIScheduleCandReason R)