22#define DEBUG_TYPE "machine-scheduler"
126 case NoCand:
return "NOCAND";
128 case Latency:
return "LATENCY";
130 case Depth:
return "DEPTH";
143 if (TryVal < CandVal) {
147 if (TryVal > CandVal) {
160 if (TryVal > CandVal) {
164 if (TryVal < CandVal) {
177 NodeNum2Index[SU->
NodeNum] = SUnits.size();
178 SUnits.push_back(SU);
182void SIScheduleBlock::traceCandidate(
const SISchedCandidate &Cand) {
189void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
190 SISchedCandidate &TryCand) {
192 if (!Cand.isValid()) {
197 if (Cand.SGPRUsage > 60 &&
218 Cand.HasLowLatencyNonWaitedParent,
226 if (TryCand.IsLowLatency &&
236 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
241SUnit* SIScheduleBlock::pickNode() {
242 SISchedCandidate TopCand;
244 for (
SUnit* SU : TopReadySUs) {
245 SISchedCandidate TryCand;
246 std::vector<unsigned> pressure;
247 std::vector<unsigned> MaxPressure;
251 TryCand.SGPRUsage = pressure[AMDGPU::RegisterPressureSets::SReg_32];
252 TryCand.VGPRUsage = pressure[AMDGPU::RegisterPressureSets::VGPR_32];
255 TryCand.HasLowLatencyNonWaitedParent =
256 HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
257 tryCandidateTopDown(TopCand, TryCand);
258 if (TryCand.Reason !=
NoCand)
259 TopCand.setBest(TryCand);
272 for (
SUnit* SU : SUnits) {
273 if (!SU->NumPredsLeft)
274 TopReadySUs.push_back(SU);
277 while (!TopReadySUs.empty()) {
278 SUnit *SU = TopReadySUs[0];
279 ScheduledSUnits.push_back(SU);
292 UI =
MRI->def_instr_begin(Reg),
293 UE =
MRI->def_instr_end(); UI != UE; ++UI) {
295 if (
MI->isDebugValue())
298 if (InstSlot >=
First && InstSlot <=
Last)
316 for (
SUnit* SU : ScheduledSUnits) {
317 RPTracker.setPos(SU->getInstr());
322 RPTracker.closeRegion();
325 TopRPTracker.
addLiveRegs(RPTracker.getPressure().LiveInRegs);
326 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
329 for (
const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
330 if (RegMaskPair.RegUnit.isVirtual())
331 LiveInRegs.insert(RegMaskPair.RegUnit);
356 for (
const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
358 if (
Reg.isVirtual() &&
362 LiveOutRegs.insert(Reg);
383 initRegPressure(BeginBlock, EndBlock);
390 for (
SUnit* SU : SUnits) {
391 if (!SU->NumPredsLeft)
392 TopReadySUs.push_back(SU);
395 while (!TopReadySUs.empty()) {
396 SUnit *SU = pickNode();
397 ScheduledSUnits.push_back(SU);
404 InternalAdditionalPressure.resize(TopPressure.
MaxSetPressure.size());
408 assert(SUnits.size() == ScheduledSUnits.size() &&
409 TopReadySUs.empty());
410 for (
SUnit* SU : SUnits) {
412 SU->NumPredsLeft == 0);
419void SIScheduleBlock::undoSchedule() {
420 for (
SUnit* SU : SUnits) {
421 SU->isScheduled =
false;
422 for (
SDep& Succ : SU->Succs) {
424 undoReleaseSucc(SU, &Succ);
427 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
428 ScheduledSUnits.clear();
432void SIScheduleBlock::undoReleaseSucc(
SUnit *SU,
SDep *SuccEdge) {
442void SIScheduleBlock::releaseSucc(
SUnit *SU,
SDep *SuccEdge) {
451 dbgs() <<
"*** Scheduling failed! ***\n";
453 dbgs() <<
" has been released too many times!\n";
462void SIScheduleBlock::releaseSuccessors(
SUnit *SU,
bool InOrOutBlock) {
472 releaseSucc(SU, &Succ);
474 TopReadySUs.push_back(SuccSU);
478void SIScheduleBlock::nodeScheduled(
SUnit *SU) {
481 std::vector<SUnit *>::iterator
I =
llvm::find(TopReadySUs, SU);
482 if (
I == TopReadySUs.end()) {
483 dbgs() <<
"Data Structure Bug in SI Scheduler\n";
486 TopReadySUs.erase(
I);
488 releaseSuccessors(SU,
true);
491 if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->
NodeNum]])
492 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
496 std::map<unsigned, unsigned>::iterator
I =
498 if (
I != NodeNum2Index.end())
499 HasLowLatencyNonWaitedParent[
I->second] = 1;
507 for (
SUnit* SU : SUnits) {
508 releaseSuccessors(SU,
false);
510 HighLatencyBlock =
true;
512 HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
517 unsigned PredID = Pred->
getID();
521 if (PredID ==
P->getID())
524 Preds.push_back(Pred);
529 return PredID == S.first->getID();
531 "Loop in the Block Graph!");
536 unsigned SuccID = Succ->
getID();
539 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) {
540 if (SuccID == S.first->getID()) {
548 ++NumHighLatencySuccessors;
549 Succs.push_back(std::pair(Succ, Kind));
553 "Loop in the Block Graph!");
558 dbgs() <<
"Block (" <<
ID <<
")\n";
562 dbgs() <<
"\nContains High Latency Instruction: "
563 << HighLatencyBlock <<
'\n';
564 dbgs() <<
"\nDepends On:\n";
566 P->printDebug(
false);
569 dbgs() <<
"\nSuccessors:\n";
570 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) {
572 dbgs() <<
"(Data Dep) ";
573 S.first->printDebug(
false);
577 dbgs() <<
"LiveInPressure "
578 << LiveInPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
' '
579 << LiveInPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
'\n';
580 dbgs() <<
"LiveOutPressure "
581 << LiveOutPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
' '
582 << LiveOutPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
"\n\n";
583 dbgs() <<
"LiveIns:\n";
584 for (
unsigned Reg : LiveInRegs)
587 dbgs() <<
"\nLiveOuts:\n";
588 for (
unsigned Reg : LiveOutRegs)
592 dbgs() <<
"\nInstructions:\n";
593 for (
const SUnit* SU : SUnits)
596 dbgs() <<
"///////////////////////\n";
607 std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator
B =
608 Blocks.find(BlockVariant);
609 if (
B == Blocks.end()) {
611 createBlocksForVariant(BlockVariant);
613 scheduleInsideBlocks();
615 Res.
Blocks = CurrentBlocks;
618 Blocks[BlockVariant] = Res;
628 return CurrentBlocks[Node2CurrentBlock[SU->
NodeNum]]->getID() ==
ID;
631void SIScheduleBlockCreator::colorHighLatenciesAlone() {
632 unsigned DAGSize = DAG->
SUnits.size();
634 for (
unsigned i = 0, e = DAGSize; i != e; ++i) {
637 CurrentColoring[SU->
NodeNum] = NextReservedID++;
644 for (
const auto &PredDep : SU.
Preds) {
645 if (PredDep.getSUnit() == &FromSU &&
652void SIScheduleBlockCreator::colorHighLatenciesGroups() {
653 unsigned DAGSize = DAG->
SUnits.size();
654 unsigned NumHighLatencies = 0;
656 int Color = NextReservedID;
658 std::set<unsigned> FormingGroup;
660 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
666 if (NumHighLatencies == 0)
669 if (NumHighLatencies <= 6)
671 else if (NumHighLatencies <= 12)
679 unsigned CompatibleGroup =
true;
680 int ProposedColor = Color;
681 std::vector<int> AdditionalElements;
693 for (
unsigned j : FormingGroup) {
695 std::vector<int> SubGraph;
708 else if (SubGraph.size() > 5) {
710 CompatibleGroup =
false;
715 for (
unsigned k : SubGraph) {
721 (CurrentColoring[k] != ProposedColor &&
722 CurrentColoring[k] != 0)) {
723 CompatibleGroup =
false;
729 CompatibleGroup =
false;
733 if (!CompatibleGroup)
737 CompatibleGroup =
false;
748 if (CompatibleGroup) {
749 FormingGroup.insert(SU.
NodeNum);
750 for (
unsigned j : AdditionalElements)
751 CurrentColoring[
j] = ProposedColor;
752 CurrentColoring[SU.
NodeNum] = ProposedColor;
758 if (!CompatibleGroup) {
759 FormingGroup.clear();
760 Color = ++NextReservedID;
761 ProposedColor = Color;
762 FormingGroup.insert(SU.
NodeNum);
763 CurrentColoring[SU.
NodeNum] = ProposedColor;
765 }
else if (Count == GroupSize) {
766 FormingGroup.clear();
767 Color = ++NextReservedID;
768 ProposedColor = Color;
775void SIScheduleBlockCreator::colorComputeReservedDependencies() {
776 unsigned DAGSize = DAG->
SUnits.size();
777 std::map<std::set<unsigned>,
unsigned> ColorCombinations;
779 CurrentTopDownReservedDependencyColoring.clear();
780 CurrentBottomUpReservedDependencyColoring.clear();
782 CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
783 CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
790 std::set<unsigned> SUColors;
793 if (CurrentColoring[SU->
NodeNum]) {
794 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
803 if (CurrentTopDownReservedDependencyColoring[Pred->
NodeNum] > 0)
804 SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->
NodeNum]);
807 if (SUColors.empty())
810 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
811 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
814 std::map<std::set<unsigned>,
unsigned>::iterator Pos =
815 ColorCombinations.find(SUColors);
816 if (Pos != ColorCombinations.end()) {
817 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] = Pos->second;
819 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
821 ColorCombinations[SUColors] = NextNonReservedID++;
826 ColorCombinations.clear();
832 std::set<unsigned> SUColors;
835 if (CurrentColoring[SU->
NodeNum]) {
836 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
845 if (CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum] > 0)
846 SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum]);
849 if (SUColors.empty())
852 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
853 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
856 std::map<std::set<unsigned>,
unsigned>::iterator Pos =
857 ColorCombinations.find(SUColors);
858 if (Pos != ColorCombinations.end()) {
859 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] = Pos->second;
861 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
863 ColorCombinations[SUColors] = NextNonReservedID++;
869void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
870 std::map<std::pair<unsigned, unsigned>,
unsigned> ColorCombinations;
876 std::pair<unsigned, unsigned> SUColors;
879 if (CurrentColoring[SU.
NodeNum])
882 SUColors.first = CurrentTopDownReservedDependencyColoring[SU.
NodeNum];
883 SUColors.second = CurrentBottomUpReservedDependencyColoring[SU.
NodeNum];
885 std::map<std::pair<unsigned, unsigned>,
unsigned>::iterator Pos =
886 ColorCombinations.find(SUColors);
887 if (Pos != ColorCombinations.end()) {
888 CurrentColoring[SU.
NodeNum] = Pos->second;
890 CurrentColoring[SU.
NodeNum] = NextNonReservedID;
891 ColorCombinations[SUColors] = NextNonReservedID++;
896void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
897 unsigned DAGSize = DAG->
SUnits.size();
898 std::vector<int> PendingColoring = CurrentColoring;
901 CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&
902 CurrentTopDownReservedDependencyColoring.size() == DAGSize);
911 std::set<unsigned> SUColors;
912 std::set<unsigned> SUColorsPending;
914 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
917 if (CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] > 0 ||
918 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] > 0)
925 if (CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum] > 0 ||
926 CurrentTopDownReservedDependencyColoring[Succ->
NodeNum] > 0)
927 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
928 SUColorsPending.insert(PendingColoring[Succ->
NodeNum]);
933 if (SUColors.size() == 1 && SUColorsPending.size() == 1)
934 PendingColoring[SU->
NodeNum] = *SUColors.begin();
937 PendingColoring[SU->
NodeNum] = NextNonReservedID++;
939 CurrentColoring = PendingColoring;
943void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
944 unsigned DAGSize = DAG->
SUnits.size();
945 unsigned PreviousColor;
946 std::set<unsigned> SeenColors;
951 PreviousColor = CurrentColoring[0];
953 for (
unsigned i = 1, e = DAGSize; i !=
e; ++i) {
955 unsigned CurrentColor = CurrentColoring[i];
956 unsigned PreviousColorSave = PreviousColor;
959 if (CurrentColor != PreviousColor)
960 SeenColors.insert(PreviousColor);
961 PreviousColor = CurrentColor;
963 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
966 if (SeenColors.find(CurrentColor) == SeenColors.end())
969 if (PreviousColorSave != CurrentColor)
970 CurrentColoring[i] = NextNonReservedID++;
972 CurrentColoring[i] = CurrentColoring[i-1];
976void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
977 unsigned DAGSize = DAG->
SUnits.size();
981 std::set<unsigned> SUColors;
983 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
995 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
997 if (SUColors.size() == 1)
998 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1002void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
1003 unsigned DAGSize = DAG->
SUnits.size();
1007 std::set<unsigned> SUColors;
1009 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1016 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1018 if (SUColors.size() == 1)
1019 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1023void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
1024 unsigned DAGSize = DAG->
SUnits.size();
1028 std::set<unsigned> SUColors;
1030 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1037 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1039 if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
1040 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1044void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
1045 unsigned DAGSize = DAG->
SUnits.size();
1046 std::map<unsigned, unsigned> ColorCount;
1050 unsigned color = CurrentColoring[SU->
NodeNum];
1051 ++ColorCount[color];
1056 unsigned color = CurrentColoring[SU->
NodeNum];
1057 std::set<unsigned> SUColors;
1059 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1062 if (ColorCount[color] > 1)
1069 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1071 if (SUColors.size() == 1 && *SUColors.begin() != color) {
1072 --ColorCount[color];
1073 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1074 ++ColorCount[*SUColors.begin()];
1079void SIScheduleBlockCreator::cutHugeBlocks() {
1083void SIScheduleBlockCreator::regroupNoUserInstructions() {
1084 unsigned DAGSize = DAG->
SUnits.size();
1085 int GroupID = NextNonReservedID++;
1089 bool hasSuccessor =
false;
1091 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1098 hasSuccessor =
true;
1101 CurrentColoring[SU->
NodeNum] = GroupID;
1105void SIScheduleBlockCreator::colorExports() {
1106 unsigned ExportColor = NextNonReservedID++;
1131 "SUnit unexpectedly not representing an instruction!");
1147 for (
unsigned j : ExpGroup)
1148 CurrentColoring[
j] = ExportColor;
1152 unsigned DAGSize = DAG->
SUnits.size();
1153 std::map<unsigned,unsigned> RealID;
1155 CurrentBlocks.clear();
1156 CurrentColoring.clear();
1157 CurrentColoring.resize(DAGSize, 0);
1158 Node2CurrentBlock.clear();
1164 NextNonReservedID = DAGSize + 1;
1169 colorHighLatenciesGroups();
1171 colorHighLatenciesAlone();
1172 colorComputeReservedDependencies();
1173 colorAccordingToReservedDependencies();
1174 colorEndsAccordingToDependencies();
1176 colorForceConsecutiveOrderInGroup();
1177 regroupNoUserInstructions();
1178 colorMergeConstantLoadsNextGroup();
1179 colorMergeIfPossibleNextGroupOnlyForReserved();
1183 Node2CurrentBlock.resize(DAGSize, -1);
1184 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1186 unsigned Color = CurrentColoring[SU->
NodeNum];
1187 if (RealID.find(Color) == RealID.end()) {
1188 int ID = CurrentBlocks.size();
1189 BlockPtrs.push_back(std::make_unique<SIScheduleBlock>(DAG,
this,
ID));
1190 CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
1193 CurrentBlocks[RealID[Color]]->addUnit(SU);
1194 Node2CurrentBlock[SU->
NodeNum] = RealID[Color];
1198 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1200 int SUID = Node2CurrentBlock[i];
1205 if (Node2CurrentBlock[Succ->
NodeNum] != SUID)
1206 CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->
NodeNum]],
1213 if (Node2CurrentBlock[Pred->
NodeNum] != SUID)
1214 CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->
NodeNum]]);
1220 Block->finalizeUnits();
1222 dbgs() <<
"Blocks created:\n\n";
1224 Block->printDebug(
true);
1234 for (;
I !=
End; ++
I) {
1235 if (!
I->isDebugInstr())
1241void SIScheduleBlockCreator::topologicalSort() {
1242 unsigned DAGSize = CurrentBlocks.size();
1243 std::vector<int> WorkList;
1247 WorkList.reserve(DAGSize);
1248 TopDownIndex2Block.resize(DAGSize);
1249 TopDownBlock2Index.resize(DAGSize);
1250 BottomUpIndex2Block.resize(DAGSize);
1252 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1254 unsigned Degree =
Block->getSuccs().size();
1255 TopDownBlock2Index[i] = Degree;
1257 WorkList.push_back(i);
1262 while (!WorkList.empty()) {
1263 int i = WorkList.back();
1265 WorkList.pop_back();
1266 TopDownBlock2Index[i] = --
Id;
1267 TopDownIndex2Block[
Id] = i;
1269 if (!--TopDownBlock2Index[Pred->getID()])
1270 WorkList.push_back(Pred->getID());
1276 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1279 assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&
1280 "Wrong Top Down topological sorting");
1285 BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
1286 TopDownIndex2Block.rend());
1289void SIScheduleBlockCreator::scheduleInsideBlocks() {
1290 unsigned DAGSize = CurrentBlocks.size();
1296 LLVM_DEBUG(
dbgs() <<
"First phase: Fast scheduling for Reg Liveness\n");
1297 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1299 Block->fastSchedule();
1307 std::vector<MachineBasicBlock::iterator> PosOld;
1308 std::vector<MachineBasicBlock::iterator> PosNew;
1309 PosOld.reserve(DAG->
SUnits.size());
1310 PosNew.reserve(DAG->
SUnits.size());
1312 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1313 int BlockIndice = TopDownIndex2Block[i];
1315 std::vector<SUnit*> SUs =
Block->getScheduledUnits();
1317 for (
SUnit* SU : SUs) {
1320 PosOld.push_back(Pos);
1321 if (&*CurrentTopFastSched ==
MI) {
1322 PosNew.push_back(Pos);
1323 CurrentTopFastSched =
nextIfDebug(++CurrentTopFastSched,
1335 PosNew.push_back(CurrentTopFastSched);
1344 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1346 std::vector<SUnit*> SUs =
Block->getScheduledUnits();
1347 Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
1352 for (
unsigned i = PosOld.size(), e = 0; i != e; --i) {
1366 Block->printDebug(
true);
1370void SIScheduleBlockCreator::fillStats() {
1371 unsigned DAGSize = CurrentBlocks.size();
1373 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1374 int BlockIndice = TopDownIndex2Block[i];
1376 if (
Block->getPreds().empty())
1381 if (Depth < Pred->
Depth + Pred->getCost())
1382 Depth = Pred->Depth + Pred->getCost();
1388 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1389 int BlockIndice = BottomUpIndex2Block[i];
1391 if (
Block->getSuccs().empty())
1394 unsigned Height = 0;
1395 for (
const auto &Succ :
Block->getSuccs())
1396 Height = std::max(Height, Succ.first->Height + Succ.first->getCost());
1397 Block->Height = Height;
1407 DAG(DAG), Variant(Variant),
Blocks(BlocksStruct.
Blocks),
1408 LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
1409 SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
1421 LiveOutRegsNumUsages.resize(Blocks.size());
1423 for (
unsigned Reg :
Block->getInRegs()) {
1427 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1428 std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1430 if (RegPos != PredOutRegs.end()) {
1442 ++LiveOutRegsNumUsages[PredID][Reg];
1446 LastPosHighLatencyParentScheduled.resize(
Blocks.size(), 0);
1447 BlockNumPredsLeft.resize(
Blocks.size());
1448 BlockNumSuccsLeft.resize(
Blocks.size());
1450 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1452 BlockNumPredsLeft[i] =
Block->getPreds().size();
1453 BlockNumSuccsLeft[i] =
Block->getSuccs().size();
1457 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1463 std::set<unsigned> InRegs = DAG->
getInRegs();
1464 addLiveRegs(InRegs);
1470 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1474 const std::set<unsigned> &OutRegs =
Block->getOutRegs();
1476 if (OutRegs.find(Reg) == OutRegs.end())
1479 ++LiveOutRegsNumUsages[
ID][Reg];
1487 for (
unsigned Reg :
Block->getInRegs()) {
1490 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1491 std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1493 if (RegPos != PredOutRegs.end()) {
1500 ++LiveRegsConsumers[Reg];
1504 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1506 if (BlockNumPredsLeft[i] == 0) {
1507 ReadyBlocks.push_back(
Block);
1512 BlocksScheduled.push_back(
Block);
1513 blockScheduled(
Block);
1517 : BlocksScheduled) {
1522bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
1523 SIBlockSchedCandidate &TryCand) {
1524 if (!Cand.isValid()) {
1531 Cand.LastPosHighLatParentScheduled, TryCand, Cand,
Latency))
1538 TryCand, Cand,
Depth))
1541 Cand.NumHighLatencySuccessors,
1547bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
1548 SIBlockSchedCandidate &TryCand) {
1549 if (!Cand.isValid()) {
1558 Cand.NumSuccessors > 0,
1570 SIBlockSchedCandidate Cand;
1571 std::vector<SIScheduleBlock*>::iterator Best;
1573 if (ReadyBlocks.empty())
1577 VregCurrentUsage, SregCurrentUsage);
1578 if (VregCurrentUsage > maxVregUsage)
1579 maxVregUsage = VregCurrentUsage;
1580 if (SregCurrentUsage > maxSregUsage)
1581 maxSregUsage = SregCurrentUsage;
1584 : ReadyBlocks)
dbgs()
1585 <<
Block->getID() <<
' ';
1586 dbgs() <<
"\nCurrent Live:\n";
1591 dbgs() <<
"Current VGPRs: " << VregCurrentUsage <<
'\n';
1592 dbgs() <<
"Current SGPRs: " << SregCurrentUsage <<
'\n';);
1594 Cand.Block =
nullptr;
1595 for (std::vector<SIScheduleBlock*>::iterator
I = ReadyBlocks.begin(),
1596 E = ReadyBlocks.end();
I != E; ++
I) {
1597 SIBlockSchedCandidate TryCand;
1599 TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
1600 TryCand.VGPRUsageDiff =
1601 checkRegUsageImpact(TryCand.Block->getInRegs(),
1602 TryCand.Block->getOutRegs())[AMDGPU::RegisterPressureSets::VGPR_32];
1603 TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
1604 TryCand.NumHighLatencySuccessors =
1605 TryCand.Block->getNumHighLatencySuccessors();
1606 TryCand.LastPosHighLatParentScheduled =
1607 (
unsigned int) std::max<int> (0,
1608 LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
1609 LastPosWaitedHighLatency);
1610 TryCand.Height = TryCand.Block->Height;
1612 if (VregCurrentUsage > 120 ||
1614 if (!tryCandidateRegUsage(Cand, TryCand) &&
1616 tryCandidateLatency(Cand, TryCand);
1618 if (!tryCandidateLatency(Cand, TryCand))
1619 tryCandidateRegUsage(Cand, TryCand);
1621 if (TryCand.Reason !=
NoCand) {
1622 Cand.setBest(TryCand);
1624 LLVM_DEBUG(
dbgs() <<
"Best Current Choice: " << Cand.Block->getID() <<
' '
1630 dbgs() <<
"Is a block with high latency instruction: "
1631 << (Cand.IsHighLatency ?
"yes\n" :
"no\n");
1632 dbgs() <<
"Position of last high latency dependency: "
1633 << Cand.LastPosHighLatParentScheduled <<
'\n';
1634 dbgs() <<
"VGPRUsageDiff: " << Cand.VGPRUsageDiff <<
'\n';
1638 ReadyBlocks.erase(Best);
1644void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
1647 if (!
Reg.isVirtual())
1650 (void) LiveRegs.insert(Reg);
1655 std::set<unsigned> &Regs) {
1656 for (
unsigned Reg : Regs) {
1658 std::set<unsigned>::iterator Pos = LiveRegs.find(Reg);
1659 assert (Pos != LiveRegs.end() &&
1660 LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&
1661 LiveRegsConsumers[Reg] >= 1);
1662 --LiveRegsConsumers[
Reg];
1663 if (LiveRegsConsumers[Reg] == 0)
1664 LiveRegs.erase(Pos);
1668void SIScheduleBlockScheduler::releaseBlockSuccs(
SIScheduleBlock *Parent) {
1670 if (--BlockNumPredsLeft[
Block.first->getID()] == 0)
1671 ReadyBlocks.push_back(
Block.first);
1675 LastPosHighLatencyParentScheduled[
Block.first->getID()] = NumBlockScheduled;
1681 addLiveRegs(
Block->getOutRegs());
1682 releaseBlockSuccs(
Block);
1683 for (
const auto &RegP : LiveOutRegsNumUsages[
Block->getID()]) {
1685 assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||
1686 LiveRegsConsumers[RegP.first] == 0);
1687 LiveRegsConsumers[RegP.first] += RegP.second;
1689 if (LastPosHighLatencyParentScheduled[
Block->getID()] >
1690 (
unsigned)LastPosWaitedHighLatency)
1691 LastPosWaitedHighLatency =
1692 LastPosHighLatencyParentScheduled[
Block->getID()];
1693 ++NumBlockScheduled;
1697SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
1698 std::set<unsigned> &OutRegs) {
1699 std::vector<int> DiffSetPressure;
1704 if (!
Reg.isVirtual())
1706 if (LiveRegsConsumers[Reg] > 1)
1709 for (; PSetI.
isValid(); ++PSetI) {
1710 DiffSetPressure[*PSetI] -= PSetI.
getWeight();
1716 if (!
Reg.isVirtual())
1719 for (; PSetI.
isValid(); ++PSetI) {
1720 DiffSetPressure[*PSetI] += PSetI.
getWeight();
1724 return DiffSetPressure;
1730SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
1731 SISchedulerBlockSchedulerVariant ScheduleVariant) {
1734 std::vector<SIScheduleBlock*> ScheduledBlocks;
1737 ScheduledBlocks =
Scheduler.getBlocks();
1740 std::vector<SUnit*>
SUs =
Block->getScheduledUnits();
1764void SIScheduleDAGMI::topologicalSort() {
1777void SIScheduleDAGMI::moveLowLatencies() {
1778 unsigned DAGSize =
SUnits.size();
1779 int LastLowLatencyUser = -1;
1780 int LastLowLatencyPos = -1;
1782 for (
unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {
1784 bool IsLowLatencyUser =
false;
1785 unsigned MinPos = 0;
1790 IsLowLatencyUser =
true;
1794 unsigned PredPos = ScheduledSUnitsInv[Pred->
NodeNum];
1795 if (PredPos >= MinPos)
1796 MinPos = PredPos + 1;
1800 unsigned BestPos = LastLowLatencyUser + 1;
1801 if ((
int)BestPos <= LastLowLatencyPos)
1802 BestPos = LastLowLatencyPos + 1;
1803 if (BestPos < MinPos)
1806 for (
unsigned u = i;
u > BestPos; --
u) {
1807 ++ScheduledSUnitsInv[ScheduledSUnits[
u-1]];
1808 ScheduledSUnits[
u] = ScheduledSUnits[
u-1];
1810 ScheduledSUnits[BestPos] = SU->
NodeNum;
1811 ScheduledSUnitsInv[SU->
NodeNum] = BestPos;
1813 LastLowLatencyPos = BestPos;
1814 if (IsLowLatencyUser)
1815 LastLowLatencyUser = BestPos;
1816 }
else if (IsLowLatencyUser) {
1817 LastLowLatencyUser = i;
1821 bool CopyForLowLat =
false;
1827 CopyForLowLat =
true;
1833 for (
unsigned u = i;
u > MinPos; --
u) {
1834 ++ScheduledSUnitsInv[ScheduledSUnits[
u-1]];
1835 ScheduledSUnits[
u] = ScheduledSUnits[
u-1];
1837 ScheduledSUnits[MinPos] = SU->
NodeNum;
1838 ScheduledSUnitsInv[SU->
NodeNum] = MinPos;
1845 for (
unsigned i = 0, e =
SUnits.size(); i != e; ++i) {
1846 SUnits[i].isScheduled =
false;
1847 SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;
1848 SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;
1849 SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;
1850 SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;
1855template<
typename _Iterator>
void
1857 unsigned &VgprUsage,
unsigned &SgprUsage) {
1860 for (_Iterator RegI =
First; RegI !=
End; ++RegI) {
1863 if (!
Reg.isVirtual())
1866 for (; PSetI.
isValid(); ++PSetI) {
1867 if (*PSetI == AMDGPU::RegisterPressureSets::VGPR_32)
1869 else if (*PSetI == AMDGPU::RegisterPressureSets::SReg_32)
1901 SUnitsLinksBackup =
SUnits;
1910 for (
unsigned i = 0, e = (
unsigned)
SUnits.size(); i != e; ++i) {
1916 bool OffsetIsScalable;
1917 if (SITII->getMemOperandWithOffset(*SU->
getInstr(), BaseLatOp, OffLatReg,
1918 OffsetIsScalable,
TRI))
1943 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1944 Temp =
Scheduler.scheduleVariant(v.first, v.second);
1964 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1965 Temp =
Scheduler.scheduleVariant(v.first, v.second);
1971 ScheduledSUnits = Best.
SUs;
1972 ScheduledSUnitsInv.resize(
SUnits.size());
1974 for (
unsigned i = 0, e = (
unsigned)
SUnits.size(); i != e; ++i) {
1975 ScheduledSUnitsInv[ScheduledSUnits[i]] = i;
1985 for (
unsigned I : ScheduledSUnits) {
1999 dbgs() <<
"*** Final schedule for "
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
DenseMap< Block *, BlockRelaxAux > Blocks
Machine Instruction Scheduler
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isDefBetween(unsigned Reg, SlotIndex First, SlotIndex Last, const MachineRegisterInfo *MRI, const LiveIntervals *LIS)
static const char * getReasonStr(SIScheduleCandReason Reason)
static bool hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU)
SI Machine Scheduler interface.
GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
MachineOperand class - Representation of each machine instruction operand.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
PSetIterator getPressureSets(Register RegUnit) const
Get an iterator over the pressure sets affected by the given physical or virtual register.
Iterate over the pressure sets affected by the given physical or virtual register.
unsigned getWeight() const
Track the current register pressure at some position in the instruction stream, and remember the high...
void setPos(MachineBasicBlock::const_iterator Pos)
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
void closeTop()
Set the boundary for the top of the region and summarize live ins.
void advance()
Advance across the current instruction.
void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
Wrapper class representing virtual and physical registers.
@ Data
Regular data dependence (aka true-dependence).
bool isWeak() const
Tests if this a weak dependence.
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
static bool isEXP(const MachineInstr &MI)
bool isHighLatencyDef(int Opc) const override
bool isLowLatencyInstruction(const MachineInstr &MI) const
bool isSUInBlock(SUnit *SU, unsigned ID)
SIScheduleBlockCreator(SIScheduleDAGMI *DAG)
SIScheduleBlocks getBlocks(SISchedulerBlockCreatorVariant BlockVariant)
SIScheduleBlockScheduler(SIScheduleDAGMI *DAG, SISchedulerBlockSchedulerVariant Variant, SIScheduleBlocks BlocksStruct)
ArrayRef< std::pair< SIScheduleBlock *, SIScheduleBlockLinkKind > > getSuccs() const
void addPred(SIScheduleBlock *Pred)
void printDebug(bool Full)
void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind)
void schedule(MachineBasicBlock::iterator BeginBlock, MachineBasicBlock::iterator EndBlock)
void addUnit(SUnit *SU)
Functions for Block construction.
bool isHighLatencyBlock()
void restoreSULinksLeft()
std::vector< int > BottomUpIndex2SU
std::vector< unsigned > IsHighLatencySU
std::vector< unsigned > LowLatencyOffset
std::vector< int > TopDownIndex2SU
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGTopologicalSort * GetTopo()
void fillVgprSgprCost(_Iterator First, _Iterator End, unsigned &VgprUsage, unsigned &SgprUsage)
MachineRegisterInfo * getMRI()
SIScheduleDAGMI(MachineSchedContext *C)
MachineBasicBlock::iterator getCurrentBottom()
std::vector< unsigned > IsLowLatencySU
MachineBasicBlock::iterator getCurrentTop()
std::set< unsigned > getInRegs()
MachineBasicBlock * getBB()
void initRPTracker(RegPressureTracker &RPTracker)
std::set< unsigned > getOutRegs()
~SIScheduleDAGMI() override
const TargetRegisterInfo * getTRI()
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
bool isScheduled
True once scheduled.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
ScheduleDAGTopologicalSort Topo
Topo - A topological ordering for SUnits which permits fast IsReachable and similar queries.
void dumpNode(const SUnit &SU) const override
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
void dump() const override
RegPressureTracker TopRPTracker
void dumpSchedule() const
dump the scheduled Sequence.
std::unique_ptr< MachineSchedStrategy > SchedImpl
void postProcessDAG()
Apply each ScheduleDAGMutation step in order.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
std::vector< int > GetSubGraph(const SUnit &StartSU, const SUnit &TargetSU, bool &Success)
Returns an array of SUs that are both in the successor subtree of StartSU and in the predecessor subt...
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
reverse_iterator rbegin()
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual unsigned getNumRegPressureSets() const =0
Get the number of dimensions of register pressure.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
static bool tryGreater(int TryVal, int CandVal, SISchedulerCandidate &TryCand, SISchedulerCandidate &Cand, SIScheduleCandReason Reason)
static bool tryLess(int TryVal, int CandVal, SISchedulerCandidate &TryCand, SISchedulerCandidate &Cand, SIScheduleCandReason Reason)
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > PrintDAGs
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
SISchedulerBlockSchedulerVariant
cl::opt< bool > ViewMISchedDAGs
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SISchedulerBlockCreatorVariant
@ LatenciesAlonePlusConsecutive
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
auto max_element(R &&Range)
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Implement std::hash so that hash_code can be used in STL containers.
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
std::vector< unsigned > SUs
std::vector< int > TopDownIndex2Block
std::vector< SIScheduleBlock * > Blocks
std::vector< int > TopDownBlock2Index
SIScheduleCandReason Reason
void setRepeat(SIScheduleCandReason R)