41 #define DEBUG_TYPE "misched"
145 case NoCand:
return "NOCAND";
147 case Latency:
return "LATENCY";
149 case Depth:
return "DEPTH";
161 if (TryVal < CandVal) {
165 if (TryVal > CandVal) {
178 if (TryVal > CandVal) {
182 if (TryVal < CandVal) {
194 NodeNum2Index[SU->
NodeNum] = SUnits.size();
195 SUnits.push_back(SU);
199 void SIScheduleBlock::traceCandidate(
const SISchedCandidate &Cand) {
206 void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
207 SISchedCandidate &TryCand) {
209 if (!Cand.isValid()) {
214 if (Cand.SGPRUsage > 60 &&
233 if (
tryLess(TryCand.HasLowLatencyNonWaitedParent,
234 Cand.HasLowLatencyNonWaitedParent,
238 if (
tryGreater(TryCand.IsLowLatency, Cand.IsLowLatency,
242 if (TryCand.IsLowLatency &&
243 tryLess(TryCand.LowLatencyOffset, Cand.LowLatencyOffset,
247 if (
tryLess(TryCand.VGPRUsage, Cand.VGPRUsage, TryCand, Cand,
RegUsage))
251 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
256 SUnit* SIScheduleBlock::pickNode() {
257 SISchedCandidate TopCand;
259 for (
SUnit* SU : TopReadySUs) {
260 SISchedCandidate TryCand;
261 std::vector<unsigned> pressure;
262 std::vector<unsigned> MaxPressure;
270 TryCand.HasLowLatencyNonWaitedParent =
271 HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
272 tryCandidateTopDown(TopCand, TryCand);
273 if (TryCand.Reason !=
NoCand)
274 TopCand.setBest(TryCand);
287 for (
SUnit* SU : SUnits) {
288 if (!SU->NumPredsLeft)
289 TopReadySUs.push_back(SU);
292 while (!TopReadySUs.empty()) {
293 SUnit *SU = TopReadySUs[0];
294 ScheduledSUnits.push_back(SU);
313 if (InstSlot >= First && InstSlot <= Last)
331 for (
SUnit* SU : ScheduledSUnits) {
332 RPTracker.setPos(SU->getInstr());
337 RPTracker.closeRegion();
340 TopRPTracker.
addLiveRegs(RPTracker.getPressure().LiveInRegs);
341 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
344 for (
const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
346 LiveInRegs.insert(RegMaskPair.RegUnit);
371 for (
const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
372 unsigned Reg = RegMaskPair.RegUnit;
377 LiveOutRegs.insert(Reg);
398 initRegPressure(BeginBlock, EndBlock);
405 for (
SUnit* SU : SUnits) {
406 if (!SU->NumPredsLeft)
407 TopReadySUs.push_back(SU);
410 while (!TopReadySUs.empty()) {
411 SUnit *SU = pickNode();
412 ScheduledSUnits.push_back(SU);
419 InternalAdditionnalPressure.resize(TopPressure.
MaxSetPressure.size());
423 assert(SUnits.size() == ScheduledSUnits.size() &&
424 TopReadySUs.empty());
425 for (
SUnit* SU : SUnits) {
427 SU->NumPredsLeft == 0);
434 void SIScheduleBlock::undoSchedule() {
435 for (
SUnit* SU : SUnits) {
436 SU->isScheduled =
false;
437 for (
SDep& Succ : SU->Succs) {
439 undoReleaseSucc(SU, &Succ);
442 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
443 ScheduledSUnits.clear();
447 void SIScheduleBlock::undoReleaseSucc(
SUnit *SU,
SDep *SuccEdge) {
457 void SIScheduleBlock::releaseSucc(
SUnit *SU,
SDep *SuccEdge) {
466 dbgs() <<
"*** Scheduling failed! ***\n";
468 dbgs() <<
" has been released too many times!\n";
477 void SIScheduleBlock::releaseSuccessors(
SUnit *SU,
bool InOrOutBlock) {
487 releaseSucc(SU, &Succ);
489 TopReadySUs.push_back(SuccSU);
493 void SIScheduleBlock::nodeScheduled(
SUnit *SU) {
496 std::vector<SUnit *>::iterator
I =
llvm::find(TopReadySUs, SU);
497 if (I == TopReadySUs.end()) {
498 dbgs() <<
"Data Structure Bug in SI Scheduler\n";
501 TopReadySUs.erase(I);
503 releaseSuccessors(SU,
true);
506 if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->
NodeNum]])
507 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
511 std::map<unsigned, unsigned>::iterator I =
513 if (I != NodeNum2Index.end())
514 HasLowLatencyNonWaitedParent[I->second] = 1;
522 for (
SUnit* SU : SUnits) {
523 releaseSuccessors(SU,
false);
525 HighLatencyBlock =
true;
527 HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
532 unsigned PredID = Pred->
getID();
536 if (PredID ==
P->getID())
539 Preds.push_back(Pred);
543 "Loop in the Block Graph!");
547 unsigned SuccID = Succ->
getID();
551 if (SuccID == S->getID())
555 ++NumHighLatencySuccessors;
556 Succs.push_back(Succ);
559 "Loop in the Block Graph!");
564 dbgs() <<
"Block (" << ID <<
")\n";
568 dbgs() <<
"\nContains High Latency Instruction: "
569 << HighLatencyBlock <<
'\n';
570 dbgs() <<
"\nDepends On:\n";
572 P->printDebug(
false);
575 dbgs() <<
"\nSuccessors:\n";
577 S->printDebug(
false);
585 dbgs() <<
"LiveIns:\n";
586 for (
unsigned Reg : LiveInRegs)
589 dbgs() <<
"\nLiveOuts:\n";
590 for (
unsigned Reg : LiveOutRegs)
594 dbgs() <<
"\nInstructions:\n";
596 for (
SUnit* SU : SUnits) {
600 for (
SUnit* SU : SUnits) {
605 dbgs() <<
"///////////////////////\n";
619 std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator
B =
620 Blocks.find(BlockVariant);
621 if (B == Blocks.end()) {
623 createBlocksForVariant(BlockVariant);
625 scheduleInsideBlocks();
627 Res.
Blocks = CurrentBlocks;
630 Blocks[BlockVariant] = Res;
640 return CurrentBlocks[Node2CurrentBlock[SU->
NodeNum]]->getID() ==
ID;
643 void SIScheduleBlockCreator::colorHighLatenciesAlone() {
644 unsigned DAGSize = DAG->
SUnits.size();
646 for (
unsigned i = 0, e = DAGSize;
i != e; ++
i) {
649 CurrentColoring[SU->
NodeNum] = NextReservedID++;
654 void SIScheduleBlockCreator::colorHighLatenciesGroups() {
655 unsigned DAGSize = DAG->
SUnits.size();
656 unsigned NumHighLatencies = 0;
658 unsigned Color = NextReservedID;
660 std::set<unsigned> FormingGroup;
662 for (
unsigned i = 0, e = DAGSize;
i != e; ++
i) {
668 if (NumHighLatencies == 0)
671 if (NumHighLatencies <= 6)
673 else if (NumHighLatencies <= 12)
678 for (
unsigned i = 0, e = DAGSize;
i != e; ++
i) {
681 unsigned CompatibleGroup =
true;
682 unsigned ProposedColor =
Color;
683 for (
unsigned j : FormingGroup) {
693 CompatibleGroup =
false;
695 if (!CompatibleGroup || ++Count == GroupSize) {
696 FormingGroup.clear();
697 Color = ++NextReservedID;
698 if (!CompatibleGroup) {
699 ProposedColor =
Color;
700 FormingGroup.insert(SU->
NodeNum);
704 FormingGroup.insert(SU->
NodeNum);
706 CurrentColoring[SU->
NodeNum] = ProposedColor;
711 void SIScheduleBlockCreator::colorComputeReservedDependencies() {
712 unsigned DAGSize = DAG->
SUnits.size();
713 std::map<std::set<unsigned>,
unsigned> ColorCombinations;
715 CurrentTopDownReservedDependencyColoring.clear();
716 CurrentBottomUpReservedDependencyColoring.clear();
718 CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
719 CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
726 std::set<unsigned> SUColors;
729 if (CurrentColoring[SU->
NodeNum]) {
730 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
739 if (CurrentTopDownReservedDependencyColoring[Pred->
NodeNum] > 0)
740 SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->
NodeNum]);
743 if (SUColors.empty())
746 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
747 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
750 std::map<std::set<unsigned>,
unsigned>::iterator Pos =
751 ColorCombinations.find(SUColors);
752 if (Pos != ColorCombinations.end()) {
753 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] = Pos->second;
755 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
757 ColorCombinations[SUColors] = NextNonReservedID++;
762 ColorCombinations.clear();
768 std::set<unsigned> SUColors;
771 if (CurrentColoring[SU->
NodeNum]) {
772 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
781 if (CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum] > 0)
782 SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum]);
785 if (SUColors.empty())
788 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
789 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
792 std::map<std::set<unsigned>,
unsigned>::iterator Pos =
793 ColorCombinations.find(SUColors);
794 if (Pos != ColorCombinations.end()) {
795 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] = Pos->second;
797 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
799 ColorCombinations[SUColors] = NextNonReservedID++;
805 void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
806 unsigned DAGSize = DAG->
SUnits.size();
807 std::map<std::pair<unsigned, unsigned>,
unsigned> ColorCombinations;
812 for (
unsigned i = 0, e = DAGSize;
i != e; ++
i) {
814 std::pair<unsigned, unsigned> SUColors;
817 if (CurrentColoring[SU->
NodeNum])
820 SUColors.first = CurrentTopDownReservedDependencyColoring[SU->
NodeNum];
821 SUColors.second = CurrentBottomUpReservedDependencyColoring[SU->
NodeNum];
823 std::map<std::pair<unsigned, unsigned>,
unsigned>::iterator Pos =
824 ColorCombinations.find(SUColors);
825 if (Pos != ColorCombinations.end()) {
826 CurrentColoring[SU->
NodeNum] = Pos->second;
828 CurrentColoring[SU->
NodeNum] = NextNonReservedID;
829 ColorCombinations[SUColors] = NextNonReservedID++;
834 void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
835 unsigned DAGSize = DAG->
SUnits.size();
836 std::vector<int> PendingColoring = CurrentColoring;
840 std::set<unsigned> SUColors;
841 std::set<unsigned> SUColorsPending;
843 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
846 if (CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] > 0 ||
847 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] > 0)
854 if (CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum] > 0 ||
855 CurrentTopDownReservedDependencyColoring[Succ->
NodeNum] > 0)
856 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
857 SUColorsPending.insert(PendingColoring[Succ->
NodeNum]);
859 if (SUColors.size() == 1 && SUColorsPending.size() == 1)
860 PendingColoring[SU->
NodeNum] = *SUColors.begin();
863 PendingColoring[SU->
NodeNum] = NextNonReservedID++;
865 CurrentColoring = PendingColoring;
869 void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
870 unsigned DAGSize = DAG->
SUnits.size();
871 unsigned PreviousColor;
872 std::set<unsigned> SeenColors;
877 PreviousColor = CurrentColoring[0];
879 for (
unsigned i = 1, e = DAGSize;
i != e; ++
i) {
881 unsigned CurrentColor = CurrentColoring[
i];
882 unsigned PreviousColorSave = PreviousColor;
885 if (CurrentColor != PreviousColor)
886 SeenColors.insert(PreviousColor);
887 PreviousColor = CurrentColor;
889 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
892 if (SeenColors.find(CurrentColor) == SeenColors.end())
895 if (PreviousColorSave != CurrentColor)
896 CurrentColoring[
i] = NextNonReservedID++;
898 CurrentColoring[
i] = CurrentColoring[
i-1];
902 void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
903 unsigned DAGSize = DAG->
SUnits.size();
907 std::set<unsigned> SUColors;
909 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
921 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
923 if (SUColors.size() == 1)
924 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
928 void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
929 unsigned DAGSize = DAG->
SUnits.size();
933 std::set<unsigned> SUColors;
935 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
942 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
944 if (SUColors.size() == 1)
945 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
949 void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
950 unsigned DAGSize = DAG->
SUnits.size();
954 std::set<unsigned> SUColors;
956 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
963 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
965 if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
966 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
970 void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
971 unsigned DAGSize = DAG->
SUnits.size();
972 std::map<unsigned, unsigned> ColorCount;
976 unsigned color = CurrentColoring[SU->
NodeNum];
977 std::map<unsigned, unsigned>::iterator Pos = ColorCount.find(color);
978 if (Pos != ColorCount.end()) {
981 ColorCount[color] = 1;
987 unsigned color = CurrentColoring[SU->
NodeNum];
988 std::set<unsigned> SUColors;
990 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
993 if (ColorCount[color] > 1)
1000 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1002 if (SUColors.size() == 1 && *SUColors.begin() != color) {
1003 --ColorCount[color];
1004 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1005 ++ColorCount[*SUColors.begin()];
1010 void SIScheduleBlockCreator::cutHugeBlocks() {
1014 void SIScheduleBlockCreator::regroupNoUserInstructions() {
1015 unsigned DAGSize = DAG->
SUnits.size();
1016 int GroupID = NextNonReservedID++;
1020 bool hasSuccessor =
false;
1022 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1029 hasSuccessor =
true;
1032 CurrentColoring[SU->
NodeNum] = GroupID;
1037 unsigned DAGSize = DAG->
SUnits.size();
1038 std::map<unsigned,unsigned> RealID;
1040 CurrentBlocks.clear();
1041 CurrentColoring.clear();
1042 CurrentColoring.resize(DAGSize, 0);
1043 Node2CurrentBlock.clear();
1049 NextNonReservedID = DAGSize + 1;
1051 DEBUG(
dbgs() <<
"Coloring the graph\n");
1054 colorHighLatenciesGroups();
1056 colorHighLatenciesAlone();
1057 colorComputeReservedDependencies();
1058 colorAccordingToReservedDependencies();
1059 colorEndsAccordingToDependencies();
1061 colorForceConsecutiveOrderInGroup();
1062 regroupNoUserInstructions();
1063 colorMergeConstantLoadsNextGroup();
1064 colorMergeIfPossibleNextGroupOnlyForReserved();
1067 Node2CurrentBlock.resize(DAGSize, -1);
1068 for (
unsigned i = 0, e = DAGSize;
i != e; ++
i) {
1070 unsigned Color = CurrentColoring[SU->
NodeNum];
1071 if (RealID.find(Color) == RealID.end()) {
1072 int ID = CurrentBlocks.size();
1073 BlockPtrs.push_back(llvm::make_unique<SIScheduleBlock>(DAG,
this, ID));
1074 CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
1077 CurrentBlocks[RealID[
Color]]->addUnit(SU);
1082 for (
unsigned i = 0, e = DAGSize;
i != e; ++
i) {
1084 int SUID = Node2CurrentBlock[
i];
1089 if (Node2CurrentBlock[Succ->
NodeNum] != SUID)
1090 CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->
NodeNum]]);
1096 if (Node2CurrentBlock[Pred->
NodeNum] != SUID)
1097 CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->
NodeNum]]);
1102 for (
unsigned i = 0, e = CurrentBlocks.size();
i != e; ++
i) {
1107 dbgs() <<
"Blocks created:\n\n";
1108 for (
unsigned i = 0, e = CurrentBlocks.size();
i != e; ++
i) {
1121 for (; I !=
End; ++
I) {
1122 if (!I->isDebugValue())
1128 void SIScheduleBlockCreator::topologicalSort() {
1129 unsigned DAGSize = CurrentBlocks.size();
1130 std::vector<int> WorkList;
1134 WorkList.reserve(DAGSize);
1135 TopDownIndex2Block.resize(DAGSize);
1136 TopDownBlock2Index.resize(DAGSize);
1137 BottomUpIndex2Block.resize(DAGSize);
1139 for (
unsigned i = 0, e = DAGSize;
i != e; ++
i) {
1141 unsigned Degree = Block->
getSuccs().size();
1142 TopDownBlock2Index[
i] = Degree;
1144 WorkList.push_back(
i);
1149 while (!WorkList.empty()) {
1150 int i = WorkList.back();
1152 WorkList.pop_back();
1153 TopDownBlock2Index[
i] = --
Id;
1154 TopDownIndex2Block[
Id] =
i;
1156 if (!--TopDownBlock2Index[Pred->getID()])
1157 WorkList.push_back(Pred->getID());
1163 for (
unsigned i = 0, e = DAGSize; i != e; ++
i) {
1166 assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&
1167 "Wrong Top Down topological sorting");
1172 BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
1173 TopDownIndex2Block.rend());
1176 void SIScheduleBlockCreator::scheduleInsideBlocks() {
1177 unsigned DAGSize = CurrentBlocks.size();
1179 DEBUG(
dbgs() <<
"\nScheduling Blocks\n\n");
1183 DEBUG(
dbgs() <<
"First phase: Fast scheduling for Reg Liveness\n");
1184 for (
unsigned i = 0, e = DAGSize; i != e; ++
i) {
1194 std::vector<MachineBasicBlock::iterator> PosOld;
1195 std::vector<MachineBasicBlock::iterator> PosNew;
1196 PosOld.reserve(DAG->
SUnits.size());
1197 PosNew.reserve(DAG->
SUnits.size());
1199 for (
unsigned i = 0, e = DAGSize; i != e; ++
i) {
1200 int BlockIndice = TopDownIndex2Block[
i];
1204 for (
SUnit* SU : SUs) {
1207 PosOld.push_back(Pos);
1208 if (&*CurrentTopFastSched == MI) {
1209 PosNew.push_back(Pos);
1210 CurrentTopFastSched =
nextIfDebug(++CurrentTopFastSched,
1222 PosNew.push_back(CurrentTopFastSched);
1231 for (
unsigned i = 0, e = DAGSize; i != e; ++
i) {
1234 Block->
schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
1239 for (
unsigned i = PosOld.size(), e = 0; i != e; --
i) {
1252 for (
unsigned i = 0, e = CurrentBlocks.size(); i != e; ++
i) {
1259 void SIScheduleBlockCreator::fillStats() {
1260 unsigned DAGSize = CurrentBlocks.size();
1262 for (
unsigned i = 0, e = DAGSize; i != e; ++
i) {
1263 int BlockIndice = TopDownIndex2Block[
i];
1270 if (Depth < Pred->
Depth + 1)
1271 Depth = Pred->Depth + 1;
1277 for (
unsigned i = 0, e = DAGSize; i != e; ++
i) {
1278 int BlockIndice = BottomUpIndex2Block[
i];
1283 unsigned Height = 0;
1285 if (Height < Succ->Height + 1)
1286 Height = Succ->Height + 1;
1298 DAG(DAG), Variant(Variant), Blocks(BlocksStruct.Blocks),
1299 LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
1300 SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
1312 LiveOutRegsNumUsages.resize(Blocks.size());
1313 for (
unsigned i = 0, e = Blocks.size(); i != e; ++
i) {
1315 for (
unsigned Reg : Block->
getInRegs()) {
1319 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1320 std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1322 if (RegPos != PredOutRegs.end()) {
1334 std::map<unsigned, unsigned>::iterator RegPos =
1335 LiveOutRegsNumUsages[PredID].find(Reg);
1336 if (RegPos != LiveOutRegsNumUsages[PredID].
end()) {
1337 ++LiveOutRegsNumUsages[PredID][
Reg];
1339 LiveOutRegsNumUsages[PredID][
Reg] = 1;
1344 LastPosHighLatencyParentScheduled.resize(Blocks.size(), 0);
1345 BlockNumPredsLeft.resize(Blocks.size());
1346 BlockNumSuccsLeft.resize(Blocks.size());
1348 for (
unsigned i = 0, e = Blocks.size(); i != e; ++
i) {
1350 BlockNumPredsLeft[
i] = Block->
getPreds().size();
1351 BlockNumSuccsLeft[
i] = Block->
getSuccs().size();
1355 for (
unsigned i = 0, e = Blocks.size(); i != e; ++
i) {
1361 std::set<unsigned> InRegs = DAG->
getInRegs();
1362 addLiveRegs(InRegs);
1366 for (
unsigned i = 0, e = Blocks.size(); i != e; ++
i) {
1368 for (
unsigned Reg : Block->
getInRegs()) {
1371 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1372 std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1374 if (RegPos != PredOutRegs.end()) {
1381 if (LiveRegsConsumers.find(Reg) == LiveRegsConsumers.end())
1382 LiveRegsConsumers[Reg] = 1;
1384 ++LiveRegsConsumers[
Reg];
1389 for (
unsigned i = 0, e = Blocks.size(); i != e; ++
i) {
1391 if (BlockNumPredsLeft[i] == 0) {
1392 ReadyBlocks.push_back(Block);
1397 BlocksScheduled.push_back(Block);
1398 blockScheduled(Block);
1402 dbgs() <<
"Block Order:";
1409 bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
1410 SIBlockSchedCandidate &TryCand) {
1411 if (!Cand.isValid()) {
1417 if (
tryLess(TryCand.LastPosHighLatParentScheduled,
1418 Cand.LastPosHighLatParentScheduled, TryCand, Cand,
Latency))
1421 if (
tryGreater(TryCand.IsHighLatency, Cand.IsHighLatency,
1424 if (TryCand.IsHighLatency &&
tryGreater(TryCand.Height, Cand.Height,
1425 TryCand, Cand,
Depth))
1427 if (
tryGreater(TryCand.NumHighLatencySuccessors,
1428 Cand.NumHighLatencySuccessors,
1434 bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
1435 SIBlockSchedCandidate &TryCand) {
1436 if (!Cand.isValid()) {
1441 if (
tryLess(TryCand.VGPRUsageDiff > 0, Cand.VGPRUsageDiff > 0,
1445 Cand.NumSuccessors > 0,
1450 if (
tryLess(TryCand.VGPRUsageDiff, Cand.VGPRUsageDiff,
1457 SIBlockSchedCandidate Cand;
1458 std::vector<SIScheduleBlock*>::iterator Best;
1460 if (ReadyBlocks.empty())
1464 VregCurrentUsage, SregCurrentUsage);
1465 if (VregCurrentUsage > maxVregUsage)
1466 maxVregUsage = VregCurrentUsage;
1467 if (VregCurrentUsage > maxSregUsage)
1468 maxSregUsage = VregCurrentUsage;
1470 dbgs() <<
"Picking New Blocks\n";
1471 dbgs() <<
"Available: ";
1474 dbgs() <<
"\nCurrent Live:\n";
1475 for (
unsigned Reg : LiveRegs)
1478 dbgs() <<
"Current VGPRs: " << VregCurrentUsage <<
'\n';
1479 dbgs() <<
"Current SGPRs: " << SregCurrentUsage <<
'\n';
1482 Cand.Block =
nullptr;
1483 for (std::vector<SIScheduleBlock*>::iterator I = ReadyBlocks.begin(),
1484 E = ReadyBlocks.end(); I !=
E; ++
I) {
1485 SIBlockSchedCandidate TryCand;
1487 TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
1488 TryCand.VGPRUsageDiff =
1489 checkRegUsageImpact(TryCand.Block->getInRegs(),
1491 TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
1492 TryCand.NumHighLatencySuccessors =
1493 TryCand.Block->getNumHighLatencySuccessors();
1494 TryCand.LastPosHighLatParentScheduled =
1495 (
unsigned int) std::max<int> (0,
1496 LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
1497 LastPosWaitedHighLatency);
1498 TryCand.Height = TryCand.Block->Height;
1500 if (VregCurrentUsage > 120 ||
1502 if (!tryCandidateRegUsage(Cand, TryCand) &&
1504 tryCandidateLatency(Cand, TryCand);
1506 if (!tryCandidateLatency(Cand, TryCand))
1507 tryCandidateRegUsage(Cand, TryCand);
1509 if (TryCand.Reason !=
NoCand) {
1510 Cand.setBest(TryCand);
1512 DEBUG(
dbgs() <<
"Best Current Choice: " << Cand.Block->getID() <<
' '
1518 dbgs() <<
"Picking: " << Cand.Block->getID() <<
'\n';
1519 dbgs() <<
"Is a block with high latency instruction: "
1520 << (Cand.IsHighLatency ?
"yes\n" :
"no\n");
1521 dbgs() <<
"Position of last high latency dependency: "
1522 << Cand.LastPosHighLatParentScheduled <<
'\n';
1523 dbgs() <<
"VGPRUsageDiff: " << Cand.VGPRUsageDiff <<
'\n';
1528 ReadyBlocks.erase(Best);
1534 void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &
Regs) {
1535 for (
unsigned Reg : Regs) {
1540 (void) LiveRegs.insert(Reg);
1544 void SIScheduleBlockScheduler::decreaseLiveRegs(
SIScheduleBlock *Block,
1545 std::set<unsigned> &Regs) {
1546 for (
unsigned Reg : Regs) {
1548 std::set<unsigned>::iterator Pos = LiveRegs.find(Reg);
1549 assert (Pos != LiveRegs.end() &&
1550 LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&
1551 LiveRegsConsumers[
Reg] >= 1);
1552 --LiveRegsConsumers[
Reg];
1553 if (LiveRegsConsumers[Reg] == 0)
1554 LiveRegs.erase(Pos);
1558 void SIScheduleBlockScheduler::releaseBlockSuccs(
SIScheduleBlock *Parent) {
1560 --BlockNumPredsLeft[Block->
getID()];
1561 if (BlockNumPredsLeft[Block->
getID()] == 0) {
1562 ReadyBlocks.push_back(Block);
1569 LastPosHighLatencyParentScheduled[Block->
getID()] = NumBlockScheduled;
1573 void SIScheduleBlockScheduler::blockScheduled(
SIScheduleBlock *Block) {
1574 decreaseLiveRegs(Block, Block->
getInRegs());
1576 releaseBlockSuccs(Block);
1577 for (std::map<unsigned, unsigned>::iterator RegI =
1578 LiveOutRegsNumUsages[Block->
getID()].begin(),
1579 E = LiveOutRegsNumUsages[Block->
getID()].end(); RegI !=
E; ++RegI) {
1580 std::pair<unsigned, unsigned> RegP = *RegI;
1581 if (LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end())
1582 LiveRegsConsumers[RegP.first] = RegP.second;
1584 assert(LiveRegsConsumers[RegP.first] == 0);
1585 LiveRegsConsumers[RegP.first] += RegP.second;
1588 if (LastPosHighLatencyParentScheduled[Block->
getID()] >
1589 (
unsigned)LastPosWaitedHighLatency)
1590 LastPosWaitedHighLatency =
1591 LastPosHighLatencyParentScheduled[Block->
getID()];
1592 ++NumBlockScheduled;
1596 SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
1597 std::set<unsigned> &OutRegs) {
1598 std::vector<int> DiffSetPressure;
1601 for (
unsigned Reg : InRegs) {
1605 if (LiveRegsConsumers[Reg] > 1)
1608 for (; PSetI.
isValid(); ++PSetI) {
1609 DiffSetPressure[*PSetI] -= PSetI.
getWeight();
1613 for (
unsigned Reg : OutRegs) {
1618 for (; PSetI.
isValid(); ++PSetI) {
1619 DiffSetPressure[*PSetI] += PSetI.
getWeight();
1623 return DiffSetPressure;
1629 SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
1630 SISchedulerBlockSchedulerVariant ScheduleVariant) {
1633 std::vector<SIScheduleBlock*> ScheduledBlocks;
1636 ScheduledBlocks = Scheduler.
getBlocks();
1638 for (
unsigned b = 0; b < ScheduledBlocks.size(); ++b) {
1642 for (
SUnit* SU : SUs)
1667 void SIScheduleDAGMI::topologicalSort() {
1680 void SIScheduleDAGMI::moveLowLatencies() {
1681 unsigned DAGSize = SUnits.size();
1682 int LastLowLatencyUser = -1;
1683 int LastLowLatencyPos = -1;
1685 for (
unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++
i) {
1686 SUnit *SU = &SUnits[ScheduledSUnits[
i]];
1687 bool IsLowLatencyUser =
false;
1688 unsigned MinPos = 0;
1693 IsLowLatencyUser =
true;
1697 unsigned PredPos = ScheduledSUnitsInv[Pred->
NodeNum];
1698 if (PredPos >= MinPos)
1699 MinPos = PredPos + 1;
1703 unsigned BestPos = LastLowLatencyUser + 1;
1704 if ((
int)BestPos <= LastLowLatencyPos)
1705 BestPos = LastLowLatencyPos + 1;
1706 if (BestPos < MinPos)
1709 for (
unsigned u = i; u > BestPos; --u) {
1710 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1711 ScheduledSUnits[u] = ScheduledSUnits[u-1];
1713 ScheduledSUnits[BestPos] = SU->
NodeNum;
1714 ScheduledSUnitsInv[SU->
NodeNum] = BestPos;
1716 LastLowLatencyPos = BestPos;
1717 if (IsLowLatencyUser)
1718 LastLowLatencyUser = BestPos;
1719 }
else if (IsLowLatencyUser) {
1720 LastLowLatencyUser =
i;
1724 bool CopyForLowLat =
false;
1728 CopyForLowLat =
true;
1734 for (
unsigned u = i; u > MinPos; --u) {
1735 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1736 ScheduledSUnits[u] = ScheduledSUnits[u-1];
1738 ScheduledSUnits[MinPos] = SU->
NodeNum;
1739 ScheduledSUnitsInv[SU->
NodeNum] = MinPos;
1746 for (
unsigned i = 0, e = SUnits.size(); i != e; ++
i) {
1747 SUnits[
i].isScheduled =
false;
1748 SUnits[
i].WeakPredsLeft = SUnitsLinksBackup[
i].WeakPredsLeft;
1749 SUnits[
i].NumPredsLeft = SUnitsLinksBackup[
i].NumPredsLeft;
1750 SUnits[
i].WeakSuccsLeft = SUnitsLinksBackup[
i].WeakSuccsLeft;
1751 SUnits[
i].NumSuccsLeft = SUnitsLinksBackup[
i].NumSuccsLeft;
1756 template<
typename _Iterator>
void
1758 unsigned &VgprUsage,
unsigned &SgprUsage) {
1761 for (_Iterator RegI = First; RegI !=
End; ++RegI) {
1762 unsigned Reg = *RegI;
1767 for (; PSetI.
isValid(); ++PSetI) {
1768 if (*PSetI == VGPRSetID)
1770 else if (*PSetI == SGPRSetID)
1780 DEBUG(
dbgs() <<
"Preparing Scheduling\n");
1784 for(
SUnit& SU : SUnits)
1799 SUnitsLinksBackup =
SUnits;
1808 for (
unsigned i = 0, e = (
unsigned)SUnits.size(); i != e; ++
i) {
1810 unsigned BaseLatReg;
1828 std::vector<std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant>> Variants = {
1838 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1847 std::vector<std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant>> Variants = {
1857 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1864 ScheduledSUnits = Best.
SUs;
1865 ScheduledSUnitsInv.resize(SUnits.size());
1867 for (
unsigned i = 0, e = (
unsigned)SUnits.size(); i != e; ++
i) {
1868 ScheduledSUnitsInv[ScheduledSUnits[
i]] =
i;
1878 for (std::vector<unsigned>::iterator I = ScheduledSUnits.begin(),
1879 E = ScheduledSUnits.end(); I !=
E; ++
I) {
1893 unsigned BBNum =
begin()->getParent()->getNumber();
1894 dbgs() <<
"*** Final schedule for BB#" << BBNum <<
" ***\n";
const_iterator end(StringRef path)
Get end iterator over path.
Interface definition for SIRegisterInfo.
virtual unsigned getNumRegPressureSets() const =0
Get the number of dimensions of register pressure.
std::vector< SIScheduleBlock * > Blocks
SIScheduleDAGMI(MachineSchedContext *C)
ScheduleDAGTopologicalSort Topo
Topo - A topological ordering for SUnits which permits fast IsReachable and similar queries...
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
SIScheduleCandReason Reason
MachineInstr * getInstr() const
getInstr - Return the representative MachineInstr for this SUnit.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
std::vector< unsigned > IsLowLatencySU
void addUnit(SUnit *SU)
Functions for Block construction.
std::vector< SIScheduleBlock * > getBlocks()
static MachineBasicBlock::iterator nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::const_iterator End)
Non-const version.
MachineBasicBlock::iterator begin() const
begin - Return an iterator to the top of the current scheduling region.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
std::unique_ptr< MachineSchedStrategy > SchedImpl
SmallVector< SDep, 4 > Preds
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void dumpAll(const ScheduleDAG *G) const
std::vector< unsigned > LowLatencyOffset
static bool tryGreater(int TryVal, int CandVal, SISchedulerCandidate &TryCand, SISchedulerCandidate &Cand, SIScheduleCandReason Reason)
bool isWeak() const
isWeak - Test if this a weak dependence.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly...
Reg
All possible values of the reg field in the ModR/M byte.
void InitDAGTopologicalSorting()
InitDAGTopologicalSorting - create the initial topological ordering from the DAG to be scheduled...
defusechain_iterator - This class provides iterator support for machine operands in the function that...
static const char * getReasonStr(SIScheduleCandReason Reason)
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
std::set< unsigned > getInRegs()
static bool tryLess(int TryVal, int CandVal, SISchedulerCandidate &TryCand, SISchedulerCandidate &Cand, SIScheduleCandReason Reason)
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
static def_instr_iterator def_instr_end()
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
unsigned getSGPRSetID() const
std::vector< int > TopDownIndex2Block
struct SIScheduleBlockResult scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant, SISchedulerBlockSchedulerVariant ScheduleVariant)
unsigned getVGPRPressureSet() const
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
SI Machine Scheduler interface.
void printDebug(bool Full)
bool isDebugValue() const
SDep - Scheduling dependency.
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) const final
reverse_iterator rbegin()
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
unsigned const MachineRegisterInfo * MRI
~SIScheduleDAGMI() override
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx...
std::enable_if<!std::is_array< T >::value, std::unique_ptr< T > >::type make_unique(Args &&...args)
Constructs a new T() with the given args and returns a unique_ptr<T> which owns the object...
bool isSUInBlock(SUnit *SU, unsigned ID)
void dumpSchedule() const
dump the scheduled Sequence.
std::vector< SUnit * > getScheduledUnits()
std::vector< int > TopDownIndex2SU
MachineRegisterInfo * getMRI()
MachineBasicBlock * getBB()
static const unsigned End
Track the current register pressure at some position in the instruction stream, and remember the high...
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
for(unsigned i=0, e=MI->getNumOperands();i!=e;++i)
const TargetRegisterInfo * getTRI()
SIScheduleBlocks getBlocks(SISchedulerBlockCreatorVariant BlockVariant)
bool isHighLatencyInstruction(const MachineInstr &MI) const
MachineBasicBlock::iterator getCurrentBottom()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void schedule(MachineBasicBlock::iterator BeginBlock, MachineBasicBlock::iterator EndBlock)
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
void advance()
Advance across the current instruction.
std::set< unsigned > & getOutRegs()
Color
A "color", which is either even or odd.
auto find(R &&Range, const T &Val) -> decltype(std::begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.
void addSucc(SIScheduleBlock *Succ)
std::vector< unsigned > IsHighLatencySU
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
std::vector< unsigned > SUs
SISchedulerBlockCreatorVariant
unsigned getWeight() const
SIScheduleBlockCreator(SIScheduleDAGMI *DAG)
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineBasicBlock::iterator getCurrentTop()
void fillVgprSgprCost(_Iterator First, _Iterator End, unsigned &VgprUsage, unsigned &SgprUsage)
bool isHighLatencyBlock()
machine Machine Instruction Scheduler
const std::vector< SIScheduleBlock * > & getSuccs() const
unsigned getVGPRSetID() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
void closeTop()
Set the boundary for the top of the region and summarize live ins.
Representation of each machine instruction.
Interface definition for SIInstrInfo.
void addPred(SIScheduleBlock *Pred)
const TargetRegisterInfo * TRI
def_instr_iterator def_instr_begin(unsigned RegNo) const
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
std::vector< int > BottomUpIndex2SU
Iterate over the pressure sets affected by the given physical or virtual register.
PSetIterator getPressureSets(unsigned RegUnit) const
Get an iterator over the pressure sets affected by the given physical or virtual register.
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
static bool isDefBetween(unsigned Reg, SlotIndex First, SlotIndex Last, const MachineRegisterInfo *MRI, const LiveIntervals *LIS)
bool isLowLatencyInstruction(const MachineInstr &MI) const
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
const TargetInstrInfo * TII
void restoreSULinksLeft()
unsigned getSGPRPressureSet() const
void initRPTracker(RegPressureTracker &RPTracker)
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def...
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
std::set< unsigned > & getInRegs()
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::vector< int > TopDownBlock2Index
void setRepeat(SIScheduleCandReason R)
const std::vector< SIScheduleBlock * > & getPreds() const
SmallVector< SDep, 4 > Succs
~SIScheduleBlockCreator()
void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
void setPos(MachineBasicBlock::const_iterator Pos)
std::vector< SUnit > SUnits
SlotIndex - An opaque wrapper around machine indexes.
Printable PrintVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
RegPressureTracker TopRPTracker
SISchedulerBlockSchedulerVariant
void dump(const ScheduleDAG *G) const
SUnit - Scheduling unit.
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
handleMove - call this method to notify LiveIntervals that instruction 'mi' has been moved within a b...
SUnit - Scheduling unit. This is a node in the scheduling DAG.