36 #define DEBUG_TYPE "structcfg"
38 #define DEFAULT_VEC_SLOTS 8
48 STATISTIC(numSerialPatternMatch,
"CFGStructurizer number of serial pattern "
50 STATISTIC(numIfPatternMatch,
"CFGStructurizer number of if pattern "
52 STATISTIC(numClonedBlock,
"CFGStructurizer cloned blocks");
53 STATISTIC(numClonedInstr,
"CFGStructurizer cloned instructions");
65 #define SHOWNEWINSTR(i) \
66 DEBUG(dbgs() << "New instr: " << *i << "\n");
68 #define SHOWNEWBLK(b, msg) \
70 dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
74 #define SHOWBLK_DETAIL(b, msg) \
77 dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
83 #define INVALIDSCCNUM -1
87 size_t sz = Src.
size();
88 for (
size_t i = 0;
i < sz/2; ++
i) {
90 Src[
i] = Src[sz -
i - 1];
106 class BlockInformation {
125 typedef std::map<MachineBasicBlock *, BlockInformation *> MBBInfoMap;
126 typedef std::map<MachineLoop *, MachineBasicBlock *> LoopLandInfoMap;
130 SinglePath_InPath = 1,
131 SinglePath_NotInPath = 2
136 AMDGPUCFGStructurizer() :
142 return "AMDGPU Control Flow Graph structurizer Pass";
167 MLI = &getAnalysis<MachineLoopInfo>();
168 DEBUG(
dbgs() <<
"LoopInfo:\n"; PrintLoopinfo(*MLI););
169 MDT = &getAnalysis<MachineDominatorTree>();
171 PDT = &getAnalysis<MachinePostDominatorTree>();
188 void printOrderedBlocks()
const {
191 iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++
i) {
192 dbgs() <<
"BB" << (*iterBlk)->getNumber();
193 dbgs() <<
"(" << getSCCNum(*iterBlk) <<
"," << (*iterBlk)->size() <<
")";
194 if (i != 0 && i % 10 == 0) {
203 iterEnd = LoopInfo.
end(); iter != iterEnd; ++iter) {
204 (*iter)->print(
dbgs(), 0);
215 bool AllowSideEntry =
true)
const;
237 static int getBranchNzeroOpcode(
int OldOpcode);
238 static int getBranchZeroOpcode(
int OldOpcode);
239 static int getContinueNzeroOpcode(
int OldOpcode);
240 static int getContinueZeroOpcode(
int OldOpcode);
271 int loopendPatternMatch();
333 MBBInfoMap BlockInfoMap;
334 LoopLandInfoMap LLInfoMap;
335 std::map<MachineLoop *, bool> Visited;
341 MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
342 if (It == BlockInfoMap.end())
344 return (*It).second->SccNum;
349 LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
350 if (It == LLInfoMap.end())
364 MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
365 if (It == BlockInfoMap.end())
367 return (*It).second->IsRetired;
376 if (!isRetiredBlock(LoopLand))
382 AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
384 bool AllowSideEntry)
const {
386 if (SrcMBB == DstMBB)
387 return SinglePath_InPath;
388 while (SrcMBB && SrcMBB->
succ_size() == 1) {
390 if (SrcMBB == DstMBB)
391 return SinglePath_InPath;
392 if (!AllowSideEntry && SrcMBB->
pred_size() > 1)
393 return Not_SinglePath;
396 return SinglePath_NotInPath;
397 return Not_SinglePath;
404 if (!isRetiredBlock(*It))
412 unsigned BlockSizeThreshold = 30;
413 unsigned CloneInstrThreshold = 100;
414 bool MultiplePreds = MBB && (MBB->
pred_size() > 1);
418 unsigned BlkSize = MBB->
size();
419 return ((BlkSize > BlockSizeThreshold) &&
420 (BlkSize * (MBB->
pred_size() - 1) > CloneInstrThreshold));
423 void AMDGPUCFGStructurizer::reversePredicateSetter(
429 if (I->getOpcode() == AMDGPU::PRED_X) {
430 switch (I->getOperand(2).getImm()) {
431 case AMDGPU::PRED_SETE_INT:
432 I->getOperand(2).setImm(AMDGPU::PRED_SETNE_INT);
434 case AMDGPU::PRED_SETNE_INT:
435 I->getOperand(2).setImm(AMDGPU::PRED_SETE_INT);
437 case AMDGPU::PRED_SETE:
438 I->getOperand(2).setImm(AMDGPU::PRED_SETNE);
440 case AMDGPU::PRED_SETNE:
441 I->getOperand(2).setImm(AMDGPU::PRED_SETE);
451 int NewOpcode,
const DebugLoc &DL) {
484 void AMDGPUCFGStructurizer::insertCondBranchBefore(
497 void AMDGPUCFGStructurizer::insertCondBranchBefore(
508 int AMDGPUCFGStructurizer::getBranchNzeroOpcode(
int OldOpcode) {
510 case AMDGPU::JUMP_COND:
511 case AMDGPU::JUMP:
return AMDGPU::IF_PREDICATE_SET;
512 case AMDGPU::BRANCH_COND_i32:
513 case AMDGPU::BRANCH_COND_f32:
return AMDGPU::IF_LOGICALNZ_f32;
519 int AMDGPUCFGStructurizer::getBranchZeroOpcode(
int OldOpcode) {
521 case AMDGPU::JUMP_COND:
522 case AMDGPU::JUMP:
return AMDGPU::IF_PREDICATE_SET;
523 case AMDGPU::BRANCH_COND_i32:
524 case AMDGPU::BRANCH_COND_f32:
return AMDGPU::IF_LOGICALZ_f32;
530 int AMDGPUCFGStructurizer::getContinueNzeroOpcode(
int OldOpcode) {
532 case AMDGPU::JUMP_COND:
533 case AMDGPU::JUMP:
return AMDGPU::CONTINUE_LOGICALNZ_i32;
539 int AMDGPUCFGStructurizer::getContinueZeroOpcode(
int OldOpcode) {
541 case AMDGPU::JUMP_COND:
542 case AMDGPU::JUMP:
return AMDGPU::CONTINUE_LOGICALZ_i32;
552 void AMDGPUCFGStructurizer::setTrueBranch(
MachineInstr *MI,
565 return (*It == TrueBranch) ? *Next : *It;
568 bool AMDGPUCFGStructurizer::isCondBranch(
MachineInstr *MI) {
570 case AMDGPU::JUMP_COND:
571 case AMDGPU::BRANCH_COND_i32:
572 case AMDGPU::BRANCH_COND_f32:
return true;
579 bool AMDGPUCFGStructurizer::isUncondBranch(
MachineInstr *MI) {
602 MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
606 if (MI && (isCondBranch(MI) || isUncondBranch(MI)))
611 MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
618 if (isCondBranch(MI) || isUncondBranch(MI))
629 if (It != MBB->
rend()) {
645 <<
" is return block without RETURN instr\n";);
652 iterEnd = SrcMBB->
succ_end(); It != iterEnd; ++It)
665 void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
668 MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
669 if (BranchMI && isCondBranch(BranchMI) &&
670 getTrueBranch(BranchMI) == OldMBB)
671 setTrueBranch(BranchMI, NewBlk);
677 &&
"found a jump table");
685 if (Pre->getOpcode() == AMDGPU::CONTINUE
686 && It->getOpcode() == AMDGPU::ENDLOOP)
693 for (
unsigned i = 0; i < ContInstr.
size(); ++
i)
694 ContInstr[i]->eraseFromParent();
704 bool AMDGPUCFGStructurizer::prepare() {
705 bool Changed =
false;
709 DEBUG(
dbgs() <<
"AMDGPUCFGStructurizer::prepare\n";);
711 orderBlocks(FuncRep);
717 E = MLI->end(); It !=
E; ++It) {
722 if (ExitingMBBs.
size() == 0) {
732 It = OrderedBlks.begin(), E = OrderedBlks.end(); It !=
E; ++It) {
734 removeUnconditionalBranch(MBB);
735 removeRedundantConditionalBranch(MBB);
736 if (isReturnBlock(MBB)) {
742 if (RetBlks.
size() >= 2) {
743 addDummyExitBlock(RetBlks);
750 bool AMDGPUCFGStructurizer::run() {
753 DEBUG(
dbgs() <<
"AMDGPUCFGStructurizer::run\n");
757 ReverseVector(orderedBlks);
760 DEBUG(
dbgs() <<
"Ordered blocks:\n"; printOrderedBlocks(););
764 bool MakeProgress =
false;
765 int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
771 dbgs() <<
"numIter = " << NumIter
772 <<
", numRemaintedBlk = " << NumRemainedBlk <<
"\n";
794 SccNumBlk = NumRemainedBlk;
796 dbgs() <<
"start processing SCC" << getSCCNum(SccBeginMBB);
801 if (!isRetiredBlock(MBB))
806 bool ContNextScc =
true;
808 || getSCCNum(SccBeginMBB) != getSCCNum(*It)) {
811 int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
812 if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
814 dbgs() <<
"Can't reduce SCC " << getSCCNum(MBB)
815 <<
", sccNumIter = " << SccNumIter;
816 dbgs() <<
"doesn't make any progress\n";
819 }
else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
820 SccNumBlk = sccRemainedNumBlk;
824 dbgs() <<
"repeat processing SCC" << getSCCNum(MBB)
825 <<
"sccNumIter = " << SccNumIter <<
'\n';
837 SccBeginMBB =
nullptr;
845 dbgs() <<
"Reduce to one block\n";
848 int NewnumRemainedBlk
849 = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
851 if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
853 NumRemainedBlk = NewnumRemainedBlk;
855 MakeProgress =
false;
857 dbgs() <<
"No progress\n";
861 }
while (!Finish && MakeProgress);
867 for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
869 if ((*It).second && (*It).second->IsRetired) {
870 assert(((*It).first)->getNumber() != -1);
872 dbgs() <<
"Erase BB" << ((*It).first)->getNumber() <<
"\n";
874 (*It).first->eraseFromParent();
878 BlockInfoMap.clear();
882 DEBUG(FuncRep->viewCFG());
896 const std::vector<MachineBasicBlock *> &SccNext = *It;
897 for (std::vector<MachineBasicBlock *>::const_iterator
898 blockIter = SccNext.begin(), blockEnd = SccNext.end();
899 blockIter != blockEnd; ++blockIter) {
902 recordSccnum(MBB, SccNum);
908 auto It = GTM::nodes_begin(MF), E = GTM::nodes_end(MF);
909 for (; It !=
E; ++It) {
911 SccNum = getSCCNum(MBB);
913 dbgs() <<
"unreachable block BB" << MBB->
getNumber() <<
"\n";
922 dbgs() <<
"Begin patternMatch BB" << MBB->
getNumber() <<
"\n";
925 while ((CurMatch = patternMatchGroup(MBB)) > 0)
926 NumMatch += CurMatch;
930 <<
", numMatch = " << NumMatch <<
"\n";
938 NumMatch += loopendPatternMatch();
939 NumMatch += serialPatternMatch(MBB);
940 NumMatch += ifPatternMatch(MBB);
950 if (childBlk->
pred_size() != 1 || isActiveLoophead(childBlk))
953 mergeSerialBlock(MBB, childBlk);
954 ++numSerialPatternMatch;
962 if (hasBackEdge(MBB))
964 MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
968 assert(isCondBranch(BranchMI));
972 NumMatch += serialPatternMatch(TrueMBB);
973 NumMatch += ifPatternMatch(TrueMBB);
975 NumMatch += serialPatternMatch(FalseMBB);
976 NumMatch += ifPatternMatch(FalseMBB);
995 reversePredicateSetter(MBB->
end(), *
MBB);
999 && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
1002 && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
1005 return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
1013 || (FalseMBB && FalseMBB->
pred_size() > 1))) {
1014 Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
1017 if (TrueMBB && TrueMBB->
pred_size() > 1) {
1018 TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
1022 if (FalseMBB && FalseMBB->
pred_size() > 1) {
1023 FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
1027 mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
1029 ++numIfPatternMatch;
1031 numClonedBlock += Cloned;
1033 return 1 + Cloned + NumMatch;
1036 int AMDGPUCFGStructurizer::loopendPatternMatch() {
1037 std::deque<MachineLoop *> NestedLoops;
1038 for (
auto &It: *MLI)
1040 NestedLoops.push_front(ML);
1042 if (NestedLoops.size() == 0)
1050 if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
1052 DEBUG(
dbgs() <<
"Processing:\n"; ExaminedLoop->dump(););
1053 int NumBreak = mergeLoop(ExaminedLoop);
1061 int AMDGPUCFGStructurizer::mergeLoop(
MachineLoop *LoopRep) {
1065 assert(!ExitingMBBs.
empty() &&
"Infinite Loop not supported");
1066 DEBUG(
dbgs() <<
"Loop has " << ExitingMBBs.
size() <<
" exiting blocks\n";);
1071 for (
unsigned i = 0, e = ExitBlks.size(); i < e; ++
i)
1072 ExitBlkSet.
insert(ExitBlks[i]);
1075 assert(ExitBlk &&
"Loop has several exit block");
1078 InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(LoopHeader),
1079 PE = InvMBBTraits::child_end(LoopHeader);
1080 for (; PI != PE; PI++) {
1085 for (
unsigned i = 0, e = ExitingMBBs.
size(); i < e; ++
i)
1086 mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
1087 for (
unsigned i = 0, e = LatchBlks.
size(); i < e; ++
i)
1088 settleLoopcontBlock(LatchBlks[i], LoopHeader);
1092 Match += serialPatternMatch(LoopHeader);
1093 Match += ifPatternMatch(LoopHeader);
1094 }
while (Match > 0);
1095 mergeLooplandBlock(LoopHeader, ExitBlk);
1098 MLI->changeLoopFor(LoopHeader, ParentLoop);
1100 MLI->removeBlock(LoopHeader);
1101 Visited[LoopRep] =
true;
1105 bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
1109 if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
1113 dbgs() <<
"isLoopContBreakBlock yes src1 = BB"
1115 <<
" src2 = BB" << Src2MBB->
getNumber() <<
"\n";
1126 int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
1129 dbgs() <<
"handleJumpintoIf swap trueBlk and FalseBlk" <<
"\n";
1131 Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
1145 dbgs() <<
"handleJumpintoIfImp head = BB" << HeadMBB->
getNumber()
1148 <<
" false = BB" << FalseMBB->
getNumber() <<
"\n";
1156 if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
1158 dbgs() <<
" working\n";
1161 Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
1162 Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
1164 numClonedBlock += Num;
1165 Num += serialPatternMatch(*HeadMBB->
succ_begin());
1166 Num += serialPatternMatch(*std::next(HeadMBB->
succ_begin()));
1167 Num += ifPatternMatch(HeadMBB);
1173 dbgs() <<
" not working\n";
1175 DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) :
nullptr;
1181 void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
1185 <<
" size = " << HeadMBB->
size();
1193 dbgs() <<
", true = BB" << TrueMBB->
getNumber() <<
" size = "
1194 << TrueMBB->
size() <<
" numPred = " << TrueMBB->
pred_size();
1202 dbgs() <<
", false = BB" << FalseMBB->
getNumber() <<
" size = "
1203 << FalseMBB->
size() <<
" numPred = " << FalseMBB->
pred_size();
1211 dbgs() <<
", land = BB" << LandMBB->
getNumber() <<
" size = "
1212 << LandMBB->
size() <<
" numPred = " << LandMBB->
pred_size();
1226 bool MigrateTrue =
false;
1227 bool MigrateFalse =
false;
1232 && (!FalseMBB || FalseMBB->
succ_size() <= 1));
1234 if (TrueMBB == FalseMBB)
1237 MigrateTrue = needMigrateBlock(TrueMBB);
1238 MigrateFalse = needMigrateBlock(FalseMBB);
1240 if (!MigrateTrue && !MigrateFalse)
1246 if (!MigrateTrue && TrueMBB && TrueMBB->
pred_size() > 1)
1248 if (!MigrateFalse && FalseMBB && FalseMBB->
pred_size() > 1)
1249 MigrateFalse =
true;
1252 dbgs() <<
"before improveSimpleJumpintoIf: ";
1253 showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
1268 if (!MigrateTrue || !MigrateFalse) {
1339 bool LandBlkHasOtherPred = (LandBlk->
pred_size() > 2);
1344 if (LandBlkHasOtherPred) {
1346 unsigned CmpResReg =
1349 insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
1357 insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
1361 migrateInstruction(TrueMBB, LandBlk, I);
1367 insertInstrBefore(I, AMDGPU::ELSE);
1370 migrateInstruction(FalseMBB, LandBlk, I);
1377 if (LandBlkHasOtherPred) {
1379 insertInstrBefore(I, AMDGPU::ENDIF);
1383 PE = LandBlk->
pred_end(); PI != PE; ++PI) {
1385 if (MBB != TrueMBB && MBB != FalseMBB)
1390 dbgs() <<
"result from improveSimpleJumpintoIf: ";
1391 showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
1395 *LandMBBPtr = LandBlk;
1404 <<
" <= BB" << SrcMBB->
getNumber() <<
"\n";
1409 cloneSuccessorList(DstMBB, SrcMBB);
1411 removeSuccessor(SrcMBB);
1412 MLI->removeBlock(SrcMBB);
1413 retireBlock(SrcMBB);
1416 void AMDGPUCFGStructurizer::mergeIfthenelseBlock(
MachineInstr *BranchMI,
1426 dbgs() <<
" } else ";
1432 dbgs() <<
"landBlock: ";
1453 insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
1461 retireBlock(TrueMBB);
1462 MLI->removeBlock(TrueMBB);
1466 insertInstrBefore(I, AMDGPU::ELSE);
1470 if (LandMBB && FalseMBB->
succ_size() != 0)
1472 retireBlock(FalseMBB);
1473 MLI->removeBlock(FalseMBB);
1475 insertInstrBefore(I, AMDGPU::ENDIF);
1479 if (LandMBB && TrueMBB && FalseMBB)
1487 <<
" land = BB" << LandMBB->
getNumber() <<
"\n";);
1489 insertInstrBefore(DstBlk, AMDGPU::WHILELOOP,
DebugLoc());
1490 insertInstrEnd(DstBlk, AMDGPU::ENDLOOP,
DebugLoc());
1498 <<
" land = BB" << LandMBB->
getNumber() <<
"\n";);
1499 MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
1500 assert(BranchMI && isCondBranch(BranchMI));
1501 DebugLoc DL = BranchMI->getDebugLoc();
1504 if (TrueBranch != LandMBB)
1505 reversePredicateSetter(I, *I->getParent());
1506 insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL);
1507 insertInstrBefore(I, AMDGPU::BREAK);
1508 insertInstrBefore(I, AMDGPU::ENDIF);
1510 BranchMI->eraseFromParent();
1517 DEBUG(
dbgs() <<
"settleLoopcontBlock conting = BB"
1519 <<
", cont = BB" << ContMBB->
getNumber() <<
"\n";);
1521 MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
1523 assert(isCondBranch(MI));
1526 int OldOpcode = MI->getOpcode();
1529 bool UseContinueLogical = ((&*ContingMBB->
rbegin()) == MI);
1531 if (!UseContinueLogical) {
1533 TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
1534 getBranchZeroOpcode(OldOpcode);
1535 insertCondBranchBefore(I, BranchOpcode, DL);
1537 insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
1538 insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
1541 TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
1542 getContinueZeroOpcode(OldOpcode);
1543 insertCondBranchBefore(I, BranchOpcode, DL);
1546 MI->eraseFromParent();
1553 insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
1554 getLastDebugLocInBB(ContingMBB));
1562 while (SrcMBB && SrcMBB != DstMBB) {
1565 SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
1580 "succBlk is not a prececessor of curBlk");
1583 replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
1589 cloneSuccessorList(CloneMBB, MBB);
1591 numClonedInstr += MBB->
size();
1594 dbgs() <<
"Cloned block: " <<
"BB"
1598 SHOWNEWBLK(CloneMBB,
"result of Cloned block: ");
1607 MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
1610 dbgs() <<
"migrateInstruction don't see branch instr\n" ;
1612 SpliceEnd = SrcMBB->
end();
1614 DEBUG(
dbgs() <<
"migrateInstruction see branch instr: " << *BranchMI);
1615 SpliceEnd = BranchMI;
1618 dbgs() <<
"migrateInstruction before splice dstSize = " << DstMBB->
size()
1619 <<
"srcSize = " << SrcMBB->
size() <<
"\n";
1623 DstMBB->
splice(I, SrcMBB, SrcMBB->
begin(), SpliceEnd);
1626 dbgs() <<
"migrateInstruction after splice dstSize = " << DstMBB->
size()
1627 <<
"srcSize = " << SrcMBB->
size() <<
'\n';
1632 AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(
MachineLoop* LoopRep) {
1636 if (!LoopHeader || !LoopLatch)
1638 MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
1640 if (!BranchMI || !isUncondBranch(BranchMI))
1645 SHOWNEWBLK(DummyExitBlk,
"DummyExitBlock to normalize infiniteLoop: ");
1646 DEBUG(
dbgs() <<
"Old branch instr: " << *BranchMI <<
"\n";);
1648 Ctx.
emitError(
"Extra register needed to handle CFG");
1657 while ((BranchMI = getLoopendBlockBranchInstr(MBB))
1658 && isUncondBranch(BranchMI)) {
1659 DEBUG(
dbgs() <<
"Removing uncond branch instr: " << *BranchMI);
1664 void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
1673 MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
1674 assert(BranchMI && isCondBranch(BranchMI));
1675 DEBUG(
dbgs() <<
"Removing unneeded cond branch instr: " << *BranchMI);
1677 SHOWNEWBLK(MBB1,
"Removing redundant successor");
1681 void AMDGPUCFGStructurizer::addDummyExitBlock(
1688 E = RetMBB.
end(); It !=
E; ++It) {
1699 SHOWNEWBLK(DummyExitBlk,
"DummyExitBlock: ");
1709 BlockInformation *&srcBlkInfo = BlockInfoMap[
MBB];
1711 srcBlkInfo =
new BlockInformation();
1712 srcBlkInfo->SccNum = SccNum;
1720 BlockInformation *&SrcBlkInfo = BlockInfoMap[
MBB];
1723 SrcBlkInfo =
new BlockInformation();
1725 SrcBlkInfo->IsRetired =
true;
1727 &&
"can't retire block yet");
1736 "AMDGPU CFG Structurizer",
false,
false)
1744 return new AMDGPUCFGStructurizer();
unsigned succ_size() const
void push_back(const T &Elt)
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
AMDGPU specific subclass of TargetSubtarget.
STATISTIC(NumFunctions,"Total number of functions")
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Interface definition for R600InstrInfo.
MachineBasicBlock * getMBB() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
A Module instance is used to store all the information related to an LLVM module. ...
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
LoopT * getParentLoop() const
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
SmallVector< MachineBasicBlock *, 4 > MBBVector
FunctionPass * createAMDGPUCFGStructurizerPass()
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, const DebugLoc &DL, bool NoImp=false)
CreateMachineInstr - Allocate a new MachineInstr.
BlockT * getHeader() const
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
void initializeAMDGPUCFGStructurizerPass(PassRegistry &)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
std::vector< MachineBasicBlock * >::iterator succ_iterator
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
LLVM_NODISCARD bool empty() const
INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer,"amdgpustructurizer","AMDGPU CFG Structurizer", false, false) INITIALIZE_PASS_END(AMDGPUCFGStructurizer
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
void emitError(unsigned LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
const HexagonRegisterInfo & getRegisterInfo() const
HexagonInstrInfo specifics.
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
std::vector< MachineBasicBlock * >::iterator pred_iterator
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
reverse_iterator rbegin()
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
const MachineBasicBlock * getParent() const
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
This is an important class for using LLVM in a threaded context.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
const MachineOperand & getOperand(unsigned i) const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
#define SHOWNEWBLK(b, msg)
void setMBB(MachineBasicBlock *MBB)
Represent the analysis usage information of a pass.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
FunctionPass class - This class is used to implement most global optimizations.
bool isEmpty() const
isEmpty - Return true if there are no jump tables.
succ_iterator succ_begin()
pred_iterator pred_begin()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
CloneMachineInstr - Create a new MachineInstr which is a copy of the 'Orig' instruction, identical in all ways except the instruction has no parent, prev, or next.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
Representation of each machine instruction.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
void push_back(MachineInstr *MI)
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
iterator_range< df_iterator< T > > depth_first(const T &G)
unsigned getReg() const
getReg - Returns the register number.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void push_back(MachineBasicBlock *MBB)
StringRef - Represent a constant reference to a string, i.e.
static uint32_t blk(uint32_t *Buf, int I)
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
bool isValid() const
Check for null.
std::vector< LoopT * >::const_iterator iterator
LoopInfoBase< MachineBasicBlock, MachineLoop >::iterator iterator
The iterator interface to the top-level loops in the current function.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
unsigned pred_size() const
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.