22#define DEBUG_TYPE "machine-scheduler"
126 case NoCand:
return "NOCAND";
128 case Latency:
return "LATENCY";
130 case Depth:
return "DEPTH";
143 if (TryVal < CandVal) {
147 if (TryVal > CandVal) {
160 if (TryVal > CandVal) {
164 if (TryVal < CandVal) {
177 NodeNum2Index[SU->
NodeNum] = SUnits.size();
178 SUnits.push_back(SU);
182void SIScheduleBlock::traceCandidate(
const SISchedCandidate &Cand) {
189void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
190 SISchedCandidate &TryCand) {
192 if (!Cand.isValid()) {
197 if (Cand.SGPRUsage > 60 &&
218 Cand.HasLowLatencyNonWaitedParent,
226 if (TryCand.IsLowLatency &&
236 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
241SUnit* SIScheduleBlock::pickNode() {
242 SISchedCandidate TopCand;
244 for (
SUnit* SU : TopReadySUs) {
245 SISchedCandidate TryCand;
246 std::vector<unsigned> pressure;
247 std::vector<unsigned> MaxPressure;
251 TryCand.SGPRUsage = pressure[AMDGPU::RegisterPressureSets::SReg_32];
252 TryCand.VGPRUsage = pressure[AMDGPU::RegisterPressureSets::VGPR_32];
255 TryCand.HasLowLatencyNonWaitedParent =
256 HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
257 tryCandidateTopDown(TopCand, TryCand);
258 if (TryCand.Reason !=
NoCand)
259 TopCand.setBest(TryCand);
272 for (
SUnit* SU : SUnits) {
273 if (!SU->NumPredsLeft)
274 TopReadySUs.push_back(SU);
277 while (!TopReadySUs.empty()) {
278 SUnit *SU = TopReadySUs[0];
279 ScheduledSUnits.push_back(SU);
292 UI =
MRI->def_instr_begin(Reg),
293 UE =
MRI->def_instr_end(); UI != UE; ++UI) {
295 if (
MI->isDebugValue())
298 if (InstSlot >=
First && InstSlot <=
Last)
316 for (
SUnit* SU : ScheduledSUnits) {
317 RPTracker.setPos(SU->getInstr());
322 RPTracker.closeRegion();
325 TopRPTracker.
addLiveRegs(RPTracker.getPressure().LiveInRegs);
326 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
329 for (
const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
330 if (RegMaskPair.RegUnit.isVirtual())
331 LiveInRegs.insert(RegMaskPair.RegUnit);
356 for (
const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
358 if (
Reg.isVirtual() &&
362 LiveOutRegs.insert(Reg);
383 initRegPressure(BeginBlock, EndBlock);
390 for (
SUnit* SU : SUnits) {
391 if (!SU->NumPredsLeft)
392 TopReadySUs.push_back(SU);
395 while (!TopReadySUs.empty()) {
396 SUnit *SU = pickNode();
397 ScheduledSUnits.push_back(SU);
404 InternalAdditionalPressure.resize(TopPressure.
MaxSetPressure.size());
408 assert(SUnits.size() == ScheduledSUnits.size() &&
409 TopReadySUs.empty());
410 for (
SUnit* SU : SUnits) {
412 SU->NumPredsLeft == 0);
419void SIScheduleBlock::undoSchedule() {
420 for (
SUnit* SU : SUnits) {
421 SU->isScheduled =
false;
422 for (
SDep& Succ : SU->Succs) {
424 undoReleaseSucc(SU, &Succ);
427 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
428 ScheduledSUnits.clear();
432void SIScheduleBlock::undoReleaseSucc(
SUnit *SU,
SDep *SuccEdge) {
442void SIScheduleBlock::releaseSucc(
SUnit *SU,
SDep *SuccEdge) {
451 dbgs() <<
"*** Scheduling failed! ***\n";
453 dbgs() <<
" has been released too many times!\n";
462void SIScheduleBlock::releaseSuccessors(
SUnit *SU,
bool InOrOutBlock) {
472 releaseSucc(SU, &Succ);
474 TopReadySUs.push_back(SuccSU);
478void SIScheduleBlock::nodeScheduled(
SUnit *SU) {
481 std::vector<SUnit *>::iterator
I =
llvm::find(TopReadySUs, SU);
482 if (
I == TopReadySUs.end()) {
483 dbgs() <<
"Data Structure Bug in SI Scheduler\n";
486 TopReadySUs.erase(
I);
488 releaseSuccessors(SU,
true);
491 if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->
NodeNum]])
492 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
496 std::map<unsigned, unsigned>::iterator
I =
498 if (
I != NodeNum2Index.end())
499 HasLowLatencyNonWaitedParent[
I->second] = 1;
507 for (
SUnit* SU : SUnits) {
508 releaseSuccessors(SU,
false);
510 HighLatencyBlock =
true;
512 HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
517 unsigned PredID = Pred->
getID();
521 if (PredID ==
P->getID())
524 Preds.push_back(Pred);
529 return PredID == S.first->getID();
531 "Loop in the Block Graph!");
536 unsigned SuccID = Succ->
getID();
539 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) {
540 if (SuccID == S.first->getID()) {
548 ++NumHighLatencySuccessors;
549 Succs.emplace_back(Succ, Kind);
553 "Loop in the Block Graph!");
558 dbgs() <<
"Block (" <<
ID <<
")\n";
562 dbgs() <<
"\nContains High Latency Instruction: "
563 << HighLatencyBlock <<
'\n';
564 dbgs() <<
"\nDepends On:\n";
566 P->printDebug(
false);
569 dbgs() <<
"\nSuccessors:\n";
570 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) {
572 dbgs() <<
"(Data Dep) ";
573 S.first->printDebug(
false);
577 dbgs() <<
"LiveInPressure "
578 << LiveInPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
' '
579 << LiveInPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
'\n';
580 dbgs() <<
"LiveOutPressure "
581 << LiveOutPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
' '
582 << LiveOutPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
"\n\n";
583 dbgs() <<
"LiveIns:\n";
584 for (
unsigned Reg : LiveInRegs)
587 dbgs() <<
"\nLiveOuts:\n";
588 for (
unsigned Reg : LiveOutRegs)
592 dbgs() <<
"\nInstructions:\n";
593 for (
const SUnit* SU : SUnits)
596 dbgs() <<
"///////////////////////\n";
607 std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator
B =
608 Blocks.find(BlockVariant);
609 if (
B == Blocks.end()) {
611 createBlocksForVariant(BlockVariant);
613 scheduleInsideBlocks();
615 Res.
Blocks = CurrentBlocks;
618 Blocks[BlockVariant] = Res;
627 return CurrentBlocks[Node2CurrentBlock[SU->
NodeNum]]->getID() ==
ID;
630void SIScheduleBlockCreator::colorHighLatenciesAlone() {
631 unsigned DAGSize = DAG->
SUnits.size();
633 for (
unsigned i = 0, e = DAGSize; i != e; ++i) {
636 CurrentColoring[SU->
NodeNum] = NextReservedID++;
643 for (
const auto &PredDep : SU.
Preds) {
644 if (PredDep.getSUnit() == &FromSU &&
651void SIScheduleBlockCreator::colorHighLatenciesGroups() {
652 unsigned DAGSize = DAG->
SUnits.size();
653 unsigned NumHighLatencies = 0;
655 int Color = NextReservedID;
657 std::set<unsigned> FormingGroup;
659 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
665 if (NumHighLatencies == 0)
668 if (NumHighLatencies <= 6)
670 else if (NumHighLatencies <= 12)
678 unsigned CompatibleGroup =
true;
679 int ProposedColor = Color;
680 std::vector<int> AdditionalElements;
692 for (
unsigned j : FormingGroup) {
694 std::vector<int> SubGraph;
707 if (SubGraph.size() > 5) {
709 CompatibleGroup =
false;
713 for (
unsigned k : SubGraph) {
719 CurrentColoring[k] != 0)) {
720 CompatibleGroup =
false;
726 CompatibleGroup =
false;
730 if (!CompatibleGroup)
734 CompatibleGroup =
false;
744 if (CompatibleGroup) {
745 FormingGroup.insert(SU.
NodeNum);
746 for (
unsigned j : AdditionalElements)
747 CurrentColoring[
j] = ProposedColor;
748 CurrentColoring[SU.
NodeNum] = ProposedColor;
754 if (!CompatibleGroup) {
755 FormingGroup.clear();
756 Color = ++NextReservedID;
757 ProposedColor = Color;
758 FormingGroup.insert(SU.
NodeNum);
759 CurrentColoring[SU.
NodeNum] = ProposedColor;
761 }
else if (Count == GroupSize) {
762 FormingGroup.clear();
763 Color = ++NextReservedID;
764 ProposedColor = Color;
771void SIScheduleBlockCreator::colorComputeReservedDependencies() {
772 unsigned DAGSize = DAG->
SUnits.size();
773 std::map<std::set<unsigned>,
unsigned> ColorCombinations;
775 CurrentTopDownReservedDependencyColoring.clear();
776 CurrentBottomUpReservedDependencyColoring.clear();
778 CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
779 CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
786 std::set<unsigned> SUColors;
789 if (CurrentColoring[SU->
NodeNum]) {
790 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
799 if (CurrentTopDownReservedDependencyColoring[Pred->
NodeNum] > 0)
800 SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->
NodeNum]);
803 if (SUColors.empty())
806 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
807 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
810 std::map<std::set<unsigned>,
unsigned>::iterator Pos =
811 ColorCombinations.find(SUColors);
812 if (Pos != ColorCombinations.end()) {
813 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] = Pos->second;
815 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
817 ColorCombinations[SUColors] = NextNonReservedID++;
822 ColorCombinations.clear();
828 std::set<unsigned> SUColors;
831 if (CurrentColoring[SU->
NodeNum]) {
832 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
841 if (CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum] > 0)
842 SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum]);
845 if (SUColors.empty())
848 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
849 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
852 std::map<std::set<unsigned>,
unsigned>::iterator Pos =
853 ColorCombinations.find(SUColors);
854 if (Pos != ColorCombinations.end()) {
855 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] = Pos->second;
857 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
859 ColorCombinations[SUColors] = NextNonReservedID++;
865void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
866 std::map<std::pair<unsigned, unsigned>,
unsigned> ColorCombinations;
872 std::pair<unsigned, unsigned> SUColors;
875 if (CurrentColoring[SU.
NodeNum])
878 SUColors.first = CurrentTopDownReservedDependencyColoring[SU.
NodeNum];
879 SUColors.second = CurrentBottomUpReservedDependencyColoring[SU.
NodeNum];
882 ColorCombinations.try_emplace(SUColors, NextNonReservedID);
883 CurrentColoring[SU.
NodeNum] = Pos->second;
889void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
890 unsigned DAGSize = DAG->
SUnits.size();
891 std::vector<int> PendingColoring = CurrentColoring;
894 CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&
895 CurrentTopDownReservedDependencyColoring.size() == DAGSize);
904 std::set<unsigned> SUColors;
905 std::set<unsigned> SUColorsPending;
907 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
910 if (CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] > 0 ||
911 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] > 0)
918 if (CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum] > 0 ||
919 CurrentTopDownReservedDependencyColoring[Succ->
NodeNum] > 0)
920 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
921 SUColorsPending.insert(PendingColoring[Succ->
NodeNum]);
926 if (SUColors.size() == 1 && SUColorsPending.size() == 1)
927 PendingColoring[SU->
NodeNum] = *SUColors.begin();
930 PendingColoring[SU->
NodeNum] = NextNonReservedID++;
932 CurrentColoring = PendingColoring;
936void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
937 unsigned DAGSize = DAG->
SUnits.size();
938 unsigned PreviousColor;
939 std::set<unsigned> SeenColors;
944 PreviousColor = CurrentColoring[0];
946 for (
unsigned i = 1, e = DAGSize; i !=
e; ++i) {
948 unsigned CurrentColor = CurrentColoring[i];
949 unsigned PreviousColorSave = PreviousColor;
952 if (CurrentColor != PreviousColor)
953 SeenColors.insert(PreviousColor);
954 PreviousColor = CurrentColor;
956 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
959 if (SeenColors.find(CurrentColor) == SeenColors.end())
962 if (PreviousColorSave != CurrentColor)
963 CurrentColoring[i] = NextNonReservedID++;
965 CurrentColoring[i] = CurrentColoring[i-1];
969void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
970 unsigned DAGSize = DAG->
SUnits.size();
974 std::set<unsigned> SUColors;
976 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
988 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
990 if (SUColors.size() == 1)
991 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
995void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
996 unsigned DAGSize = DAG->
SUnits.size();
1000 std::set<unsigned> SUColors;
1002 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1009 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1011 if (SUColors.size() == 1)
1012 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1016void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
1017 unsigned DAGSize = DAG->
SUnits.size();
1021 std::set<unsigned> SUColors;
1023 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1030 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1032 if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
1033 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1037void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
1038 unsigned DAGSize = DAG->
SUnits.size();
1039 std::map<unsigned, unsigned> ColorCount;
1043 unsigned color = CurrentColoring[SU->
NodeNum];
1044 ++ColorCount[color];
1049 unsigned color = CurrentColoring[SU->
NodeNum];
1050 std::set<unsigned> SUColors;
1052 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1055 if (ColorCount[color] > 1)
1062 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1064 if (SUColors.size() == 1 && *SUColors.begin() != color) {
1065 --ColorCount[color];
1066 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1067 ++ColorCount[*SUColors.begin()];
1072void SIScheduleBlockCreator::cutHugeBlocks() {
1076void SIScheduleBlockCreator::regroupNoUserInstructions() {
1077 unsigned DAGSize = DAG->
SUnits.size();
1078 int GroupID = NextNonReservedID++;
1082 bool hasSuccessor =
false;
1084 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1091 hasSuccessor =
true;
1094 CurrentColoring[SU->
NodeNum] = GroupID;
1098void SIScheduleBlockCreator::colorExports() {
1099 unsigned ExportColor = NextNonReservedID++;
1124 "SUnit unexpectedly not representing an instruction!");
1140 for (
unsigned j : ExpGroup)
1141 CurrentColoring[
j] = ExportColor;
1145 unsigned DAGSize = DAG->
SUnits.size();
1146 std::map<unsigned,unsigned> RealID;
1148 CurrentBlocks.clear();
1149 CurrentColoring.clear();
1150 CurrentColoring.resize(DAGSize, 0);
1151 Node2CurrentBlock.clear();
1157 NextNonReservedID = DAGSize + 1;
1162 colorHighLatenciesGroups();
1164 colorHighLatenciesAlone();
1165 colorComputeReservedDependencies();
1166 colorAccordingToReservedDependencies();
1167 colorEndsAccordingToDependencies();
1169 colorForceConsecutiveOrderInGroup();
1170 regroupNoUserInstructions();
1171 colorMergeConstantLoadsNextGroup();
1172 colorMergeIfPossibleNextGroupOnlyForReserved();
1176 Node2CurrentBlock.resize(DAGSize, -1);
1177 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1179 unsigned Color = CurrentColoring[SU->
NodeNum];
1180 if (RealID.find(Color) == RealID.end()) {
1181 int ID = CurrentBlocks.size();
1182 BlockPtrs.push_back(std::make_unique<SIScheduleBlock>(DAG,
this,
ID));
1183 CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
1186 CurrentBlocks[RealID[Color]]->addUnit(SU);
1187 Node2CurrentBlock[SU->
NodeNum] = RealID[Color];
1191 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1193 int SUID = Node2CurrentBlock[i];
1198 if (Node2CurrentBlock[Succ->
NodeNum] != SUID)
1199 CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->
NodeNum]],
1206 if (Node2CurrentBlock[Pred->
NodeNum] != SUID)
1207 CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->
NodeNum]]);
1213 Block->finalizeUnits();
1215 dbgs() <<
"Blocks created:\n\n";
1217 Block->printDebug(
true);
1227 for (;
I !=
End; ++
I) {
1228 if (!
I->isDebugInstr())
1234void SIScheduleBlockCreator::topologicalSort() {
1235 unsigned DAGSize = CurrentBlocks.size();
1236 std::vector<int> WorkList;
1240 WorkList.reserve(DAGSize);
1241 TopDownIndex2Block.resize(DAGSize);
1242 TopDownBlock2Index.resize(DAGSize);
1243 BottomUpIndex2Block.resize(DAGSize);
1245 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1247 unsigned Degree =
Block->getSuccs().size();
1248 TopDownBlock2Index[i] = Degree;
1250 WorkList.push_back(i);
1255 while (!WorkList.empty()) {
1256 int i = WorkList.back();
1258 WorkList.pop_back();
1259 TopDownBlock2Index[i] = --
Id;
1260 TopDownIndex2Block[
Id] = i;
1262 if (!--TopDownBlock2Index[Pred->getID()])
1263 WorkList.push_back(Pred->getID());
1269 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1272 assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&
1273 "Wrong Top Down topological sorting");
1278 BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
1279 TopDownIndex2Block.rend());
1282void SIScheduleBlockCreator::scheduleInsideBlocks() {
1283 unsigned DAGSize = CurrentBlocks.size();
1289 LLVM_DEBUG(
dbgs() <<
"First phase: Fast scheduling for Reg Liveness\n");
1290 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1292 Block->fastSchedule();
1300 std::vector<MachineBasicBlock::iterator> PosOld;
1301 std::vector<MachineBasicBlock::iterator> PosNew;
1302 PosOld.reserve(DAG->
SUnits.size());
1303 PosNew.reserve(DAG->
SUnits.size());
1305 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1306 int BlockIndice = TopDownIndex2Block[i];
1308 std::vector<SUnit*> SUs =
Block->getScheduledUnits();
1310 for (
SUnit* SU : SUs) {
1313 PosOld.push_back(Pos);
1314 if (&*CurrentTopFastSched ==
MI) {
1315 PosNew.push_back(Pos);
1316 CurrentTopFastSched =
nextIfDebug(++CurrentTopFastSched,
1328 PosNew.push_back(CurrentTopFastSched);
1337 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1339 std::vector<SUnit*> SUs =
Block->getScheduledUnits();
1340 Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
1345 for (
unsigned i = PosOld.size(), e = 0; i != e; --i) {
1359 Block->printDebug(
true);
1363void SIScheduleBlockCreator::fillStats() {
1364 unsigned DAGSize = CurrentBlocks.size();
1366 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1367 int BlockIndice = TopDownIndex2Block[i];
1369 if (
Block->getPreds().empty())
1374 if (Depth < Pred->
Depth + Pred->getCost())
1375 Depth = Pred->Depth + Pred->getCost();
1381 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1382 int BlockIndice = BottomUpIndex2Block[i];
1384 if (
Block->getSuccs().empty())
1387 unsigned Height = 0;
1388 for (
const auto &Succ :
Block->getSuccs())
1389 Height = std::max(Height, Succ.first->Height + Succ.first->getCost());
1390 Block->Height = Height;
1400 DAG(DAG), Variant(Variant),
Blocks(BlocksStruct.
Blocks),
1401 LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
1402 SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
1414 LiveOutRegsNumUsages.resize(Blocks.size());
1416 for (
unsigned Reg :
Block->getInRegs()) {
1420 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1421 std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1423 if (RegPos != PredOutRegs.end()) {
1435 ++LiveOutRegsNumUsages[PredID][Reg];
1439 LastPosHighLatencyParentScheduled.resize(
Blocks.size(), 0);
1440 BlockNumPredsLeft.resize(
Blocks.size());
1441 BlockNumSuccsLeft.resize(
Blocks.size());
1443 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1445 BlockNumPredsLeft[i] =
Block->getPreds().size();
1446 BlockNumSuccsLeft[i] =
Block->getSuccs().size();
1450 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1456 std::set<unsigned> InRegs = DAG->
getInRegs();
1457 addLiveRegs(InRegs);
1463 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1467 const std::set<unsigned> &OutRegs =
Block->getOutRegs();
1469 if (OutRegs.find(Reg) == OutRegs.end())
1472 ++LiveOutRegsNumUsages[
ID][Reg];
1480 for (
unsigned Reg :
Block->getInRegs()) {
1483 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1484 std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1486 if (RegPos != PredOutRegs.end()) {
1493 ++LiveRegsConsumers[Reg];
1497 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1499 if (BlockNumPredsLeft[i] == 0) {
1500 ReadyBlocks.push_back(
Block);
1505 BlocksScheduled.push_back(
Block);
1506 blockScheduled(
Block);
1510 : BlocksScheduled) {
1515bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
1516 SIBlockSchedCandidate &TryCand) {
1517 if (!Cand.isValid()) {
1524 Cand.LastPosHighLatParentScheduled, TryCand, Cand,
Latency))
1531 TryCand, Cand,
Depth))
1534 Cand.NumHighLatencySuccessors,
1540bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
1541 SIBlockSchedCandidate &TryCand) {
1542 if (!Cand.isValid()) {
1551 Cand.NumSuccessors > 0,
1563 SIBlockSchedCandidate Cand;
1564 std::vector<SIScheduleBlock*>::iterator Best;
1566 if (ReadyBlocks.empty())
1570 VregCurrentUsage, SregCurrentUsage);
1571 if (VregCurrentUsage > maxVregUsage)
1572 maxVregUsage = VregCurrentUsage;
1573 if (SregCurrentUsage > maxSregUsage)
1574 maxSregUsage = SregCurrentUsage;
1577 : ReadyBlocks)
dbgs()
1578 <<
Block->getID() <<
' ';
1579 dbgs() <<
"\nCurrent Live:\n";
1584 dbgs() <<
"Current VGPRs: " << VregCurrentUsage <<
'\n';
1585 dbgs() <<
"Current SGPRs: " << SregCurrentUsage <<
'\n';);
1587 Cand.Block =
nullptr;
1588 for (std::vector<SIScheduleBlock*>::iterator
I = ReadyBlocks.begin(),
1589 E = ReadyBlocks.end();
I != E; ++
I) {
1590 SIBlockSchedCandidate TryCand;
1592 TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
1593 TryCand.VGPRUsageDiff =
1594 checkRegUsageImpact(TryCand.Block->getInRegs(),
1595 TryCand.Block->getOutRegs())[AMDGPU::RegisterPressureSets::VGPR_32];
1596 TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
1597 TryCand.NumHighLatencySuccessors =
1598 TryCand.Block->getNumHighLatencySuccessors();
1599 TryCand.LastPosHighLatParentScheduled =
1600 (
unsigned int) std::max<int> (0,
1601 LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
1602 LastPosWaitedHighLatency);
1603 TryCand.Height = TryCand.Block->Height;
1605 if (VregCurrentUsage > 120 ||
1607 if (!tryCandidateRegUsage(Cand, TryCand) &&
1609 tryCandidateLatency(Cand, TryCand);
1611 if (!tryCandidateLatency(Cand, TryCand))
1612 tryCandidateRegUsage(Cand, TryCand);
1614 if (TryCand.Reason !=
NoCand) {
1615 Cand.setBest(TryCand);
1617 LLVM_DEBUG(
dbgs() <<
"Best Current Choice: " << Cand.Block->getID() <<
' '
1623 dbgs() <<
"Is a block with high latency instruction: "
1624 << (Cand.IsHighLatency ?
"yes\n" :
"no\n");
1625 dbgs() <<
"Position of last high latency dependency: "
1626 << Cand.LastPosHighLatParentScheduled <<
'\n';
1627 dbgs() <<
"VGPRUsageDiff: " << Cand.VGPRUsageDiff <<
'\n';
1631 ReadyBlocks.erase(Best);
1637void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
1640 if (!
Reg.isVirtual())
1643 (void) LiveRegs.insert(Reg);
1648 std::set<unsigned> &Regs) {
1649 for (
unsigned Reg : Regs) {
1651 std::set<unsigned>::iterator Pos = LiveRegs.find(Reg);
1652 assert (Pos != LiveRegs.end() &&
1653 LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&
1654 LiveRegsConsumers[Reg] >= 1);
1655 --LiveRegsConsumers[
Reg];
1656 if (LiveRegsConsumers[Reg] == 0)
1657 LiveRegs.erase(Pos);
1661void SIScheduleBlockScheduler::releaseBlockSuccs(
SIScheduleBlock *Parent) {
1663 if (--BlockNumPredsLeft[
Block.first->getID()] == 0)
1664 ReadyBlocks.push_back(
Block.first);
1668 LastPosHighLatencyParentScheduled[
Block.first->getID()] = NumBlockScheduled;
1674 addLiveRegs(
Block->getOutRegs());
1675 releaseBlockSuccs(
Block);
1676 for (
const auto &RegP : LiveOutRegsNumUsages[
Block->getID()]) {
1678 assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||
1679 LiveRegsConsumers[RegP.first] == 0);
1680 LiveRegsConsumers[RegP.first] += RegP.second;
1682 if (LastPosHighLatencyParentScheduled[
Block->getID()] >
1683 (
unsigned)LastPosWaitedHighLatency)
1684 LastPosWaitedHighLatency =
1685 LastPosHighLatencyParentScheduled[
Block->getID()];
1686 ++NumBlockScheduled;
1690SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
1691 std::set<unsigned> &OutRegs) {
1692 std::vector<int> DiffSetPressure;
1697 if (!
Reg.isVirtual())
1699 if (LiveRegsConsumers[Reg] > 1)
1702 for (; PSetI.
isValid(); ++PSetI) {
1703 DiffSetPressure[*PSetI] -= PSetI.
getWeight();
1709 if (!
Reg.isVirtual())
1712 for (; PSetI.
isValid(); ++PSetI) {
1713 DiffSetPressure[*PSetI] += PSetI.
getWeight();
1717 return DiffSetPressure;
1723SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
1724 SISchedulerBlockSchedulerVariant ScheduleVariant) {
1727 std::vector<SIScheduleBlock*> ScheduledBlocks;
1730 ScheduledBlocks =
Scheduler.getBlocks();
1733 std::vector<SUnit*>
SUs =
Block->getScheduledUnits();
1757void SIScheduleDAGMI::topologicalSort() {
1770void SIScheduleDAGMI::moveLowLatencies() {
1771 unsigned DAGSize =
SUnits.size();
1772 int LastLowLatencyUser = -1;
1773 int LastLowLatencyPos = -1;
1775 for (
unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {
1777 bool IsLowLatencyUser =
false;
1778 unsigned MinPos = 0;
1783 IsLowLatencyUser =
true;
1787 unsigned PredPos = ScheduledSUnitsInv[Pred->
NodeNum];
1788 if (PredPos >= MinPos)
1789 MinPos = PredPos + 1;
1793 unsigned BestPos = LastLowLatencyUser + 1;
1794 if ((
int)BestPos <= LastLowLatencyPos)
1795 BestPos = LastLowLatencyPos + 1;
1796 if (BestPos < MinPos)
1799 for (
unsigned u = i;
u > BestPos; --
u) {
1800 ++ScheduledSUnitsInv[ScheduledSUnits[
u-1]];
1801 ScheduledSUnits[
u] = ScheduledSUnits[
u-1];
1803 ScheduledSUnits[BestPos] = SU->
NodeNum;
1804 ScheduledSUnitsInv[SU->
NodeNum] = BestPos;
1806 LastLowLatencyPos = BestPos;
1807 if (IsLowLatencyUser)
1808 LastLowLatencyUser = BestPos;
1809 }
else if (IsLowLatencyUser) {
1810 LastLowLatencyUser = i;
1814 bool CopyForLowLat =
false;
1820 CopyForLowLat =
true;
1826 for (
unsigned u = i;
u > MinPos; --
u) {
1827 ++ScheduledSUnitsInv[ScheduledSUnits[
u-1]];
1828 ScheduledSUnits[
u] = ScheduledSUnits[
u-1];
1830 ScheduledSUnits[MinPos] = SU->
NodeNum;
1831 ScheduledSUnitsInv[SU->
NodeNum] = MinPos;
1838 for (
unsigned i = 0, e =
SUnits.size(); i != e; ++i) {
1839 SUnits[i].isScheduled =
false;
1840 SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;
1841 SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;
1842 SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;
1843 SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;
1848template<
typename _Iterator>
void
1850 unsigned &VgprUsage,
unsigned &SgprUsage) {
1853 for (_Iterator RegI =
First; RegI !=
End; ++RegI) {
1856 if (!
Reg.isVirtual())
1859 for (; PSetI.
isValid(); ++PSetI) {
1860 if (*PSetI == AMDGPU::RegisterPressureSets::VGPR_32)
1862 else if (*PSetI == AMDGPU::RegisterPressureSets::SReg_32)
1894 SUnitsLinksBackup =
SUnits;
1903 for (
unsigned i = 0, e = (
unsigned)
SUnits.size(); i != e; ++i) {
1909 bool OffsetIsScalable;
1910 if (SITII->getMemOperandWithOffset(*SU->
getInstr(), BaseLatOp, OffLatReg,
1911 OffsetIsScalable,
TRI))
1936 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1937 Temp =
Scheduler.scheduleVariant(v.first, v.second);
1957 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1958 Temp =
Scheduler.scheduleVariant(v.first, v.second);
1964 ScheduledSUnits = Best.
SUs;
1965 ScheduledSUnitsInv.resize(
SUnits.size());
1967 for (
unsigned i = 0, e = (
unsigned)
SUnits.size(); i != e; ++i) {
1968 ScheduledSUnitsInv[ScheduledSUnits[i]] = i;
1978 for (
unsigned I : ScheduledSUnits) {
1992 dbgs() <<
"*** Final schedule for "
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
DenseMap< Block *, BlockRelaxAux > Blocks
Machine Instruction Scheduler
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isDefBetween(unsigned Reg, SlotIndex First, SlotIndex Last, const MachineRegisterInfo *MRI, const LiveIntervals *LIS)
static const char * getReasonStr(SIScheduleCandReason Reason)
static bool hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU)
SI Machine Scheduler interface.
GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
MachineOperand class - Representation of each machine instruction operand.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
PSetIterator getPressureSets(Register RegUnit) const
Get an iterator over the pressure sets affected by the given physical or virtual register.
Iterate over the pressure sets affected by the given physical or virtual register.
unsigned getWeight() const
Track the current register pressure at some position in the instruction stream, and remember the high...
void setPos(MachineBasicBlock::const_iterator Pos)
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
void closeTop()
Set the boundary for the top of the region and summarize live ins.
void advance()
Advance across the current instruction.
void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
Wrapper class representing virtual and physical registers.
@ Data
Regular data dependence (aka true-dependence).
bool isWeak() const
Tests if this a weak dependence.
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
static bool isEXP(const MachineInstr &MI)
bool isHighLatencyDef(int Opc) const override
bool isLowLatencyInstruction(const MachineInstr &MI) const
bool isSUInBlock(SUnit *SU, unsigned ID)
SIScheduleBlockCreator(SIScheduleDAGMI *DAG)
SIScheduleBlocks getBlocks(SISchedulerBlockCreatorVariant BlockVariant)
SIScheduleBlockScheduler(SIScheduleDAGMI *DAG, SISchedulerBlockSchedulerVariant Variant, SIScheduleBlocks BlocksStruct)
ArrayRef< std::pair< SIScheduleBlock *, SIScheduleBlockLinkKind > > getSuccs() const
void addPred(SIScheduleBlock *Pred)
void printDebug(bool Full)
void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind)
void schedule(MachineBasicBlock::iterator BeginBlock, MachineBasicBlock::iterator EndBlock)
void addUnit(SUnit *SU)
Functions for Block construction.
bool isHighLatencyBlock()
void restoreSULinksLeft()
std::vector< int > BottomUpIndex2SU
std::vector< unsigned > IsHighLatencySU
std::vector< unsigned > LowLatencyOffset
std::vector< int > TopDownIndex2SU
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGTopologicalSort * GetTopo()
void fillVgprSgprCost(_Iterator First, _Iterator End, unsigned &VgprUsage, unsigned &SgprUsage)
MachineRegisterInfo * getMRI()
SIScheduleDAGMI(MachineSchedContext *C)
MachineBasicBlock::iterator getCurrentBottom()
std::vector< unsigned > IsLowLatencySU
MachineBasicBlock::iterator getCurrentTop()
std::set< unsigned > getInRegs()
MachineBasicBlock * getBB()
void initRPTracker(RegPressureTracker &RPTracker)
std::set< unsigned > getOutRegs()
~SIScheduleDAGMI() override
const TargetRegisterInfo * getTRI()
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
bool isScheduled
True once scheduled.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
ScheduleDAGTopologicalSort Topo
Topo - A topological ordering for SUnits which permits fast IsReachable and similar queries.
void dumpNode(const SUnit &SU) const override
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
void dump() const override
RegPressureTracker TopRPTracker
void dumpSchedule() const
dump the scheduled Sequence.
std::unique_ptr< MachineSchedStrategy > SchedImpl
void postProcessDAG()
Apply each ScheduleDAGMutation step in order.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
std::vector< int > GetSubGraph(const SUnit &StartSU, const SUnit &TargetSU, bool &Success)
Returns an array of SUs that are both in the successor subtree of StartSU and in the predecessor subt...
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
reverse_iterator rbegin()
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual unsigned getNumRegPressureSets() const =0
Get the number of dimensions of register pressure.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
static bool tryGreater(int TryVal, int CandVal, SISchedulerCandidate &TryCand, SISchedulerCandidate &Cand, SIScheduleCandReason Reason)
static bool tryLess(int TryVal, int CandVal, SISchedulerCandidate &TryCand, SISchedulerCandidate &Cand, SIScheduleCandReason Reason)
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > PrintDAGs
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
SISchedulerBlockSchedulerVariant
cl::opt< bool > ViewMISchedDAGs
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SISchedulerBlockCreatorVariant
@ LatenciesAlonePlusConsecutive
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Implement std::hash so that hash_code can be used in STL containers.
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
std::vector< unsigned > SUs
std::vector< int > TopDownIndex2Block
std::vector< SIScheduleBlock * > Blocks
std::vector< int > TopDownBlock2Index
SIScheduleCandReason Reason
void setRepeat(SIScheduleCandReason R)