37 #define DEBUG_TYPE "structcfg"
39 #define DEFAULT_VEC_SLOTS 8
49 STATISTIC(numSerialPatternMatch,
"CFGStructurizer number of serial pattern "
51 STATISTIC(numIfPatternMatch,
"CFGStructurizer number of if pattern "
53 STATISTIC(numLoopcontPatternMatch,
"CFGStructurizer number of loop-continue "
55 STATISTIC(numClonedBlock,
"CFGStructurizer cloned blocks");
56 STATISTIC(numClonedInstr,
"CFGStructurizer cloned instructions");
68 #define SHOWNEWINSTR(i) \
69 DEBUG(dbgs() << "New instr: " << *i << "\n");
71 #define SHOWNEWBLK(b, msg) \
73 dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
77 #define SHOWBLK_DETAIL(b, msg) \
80 dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
86 #define INVALIDSCCNUM -1
90 size_t sz = Src.
size();
91 for (
size_t i = 0; i < sz/2; ++i) {
93 Src[i] = Src[sz - i - 1];
109 class BlockInformation {
128 typedef std::map<MachineBasicBlock *, BlockInformation *> MBBInfoMap;
129 typedef std::map<MachineLoop *, MachineBasicBlock *> LoopLandInfoMap;
133 SinglePath_InPath = 1,
134 SinglePath_NotInPath = 2
139 AMDGPUCFGStructurizer() :
144 const char *getPassName()
const override {
145 return "AMDGPU Control Flow Graph structurizer Pass";
171 MLI = &getAnalysis<MachineLoopInfo>();
172 DEBUG(
dbgs() <<
"LoopInfo:\n"; PrintLoopinfo(*MLI););
173 MDT = &getAnalysis<MachineDominatorTree>();
175 PDT = &getAnalysis<MachinePostDominatorTree>();
192 void printOrderedBlocks()
const {
194 for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
195 iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
196 dbgs() <<
"BB" << (*iterBlk)->getNumber();
197 dbgs() <<
"(" << getSCCNum(*iterBlk) <<
"," << (*iterBlk)->size() <<
")";
198 if (i != 0 && i % 10 == 0) {
207 iterEnd = LoopInfo.
end(); iter != iterEnd; ++iter) {
208 (*iter)->print(
dbgs(), 0);
216 static unsigned getLoopDepth(
MachineLoop *LoopRep);
220 bool AllowSideEntry =
true)
const;
221 int countActiveBlock(MBBVector::const_iterator It,
222 MBBVector::const_iterator E)
const;
242 static int getBranchNzeroOpcode(
int OldOpcode);
243 static int getBranchZeroOpcode(
int OldOpcode);
244 static int getContinueNzeroOpcode(
int OldOpcode);
245 static int getContinueZeroOpcode(
int OldOpcode);
277 int loopendPatternMatch();
349 MBBInfoMap BlockInfoMap;
350 LoopLandInfoMap LLInfoMap;
351 std::map<MachineLoop *, bool> Visited;
357 MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
358 if (It == BlockInfoMap.end())
360 return (*It).second->SccNum;
365 LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
366 if (It == LLInfoMap.end())
379 unsigned AMDGPUCFGStructurizer::getLoopDepth(
MachineLoop *LoopRep) {
384 MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
385 if (It == BlockInfoMap.end())
387 return (*It).second->IsRetired;
392 while (LoopRep && LoopRep->
getHeader() == MBB) {
396 if (!isRetiredBlock(LoopLand))
402 AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
404 bool AllowSideEntry)
const {
406 if (SrcMBB == DstMBB)
407 return SinglePath_InPath;
408 while (SrcMBB && SrcMBB->
succ_size() == 1) {
410 if (SrcMBB == DstMBB)
411 return SinglePath_InPath;
412 if (!AllowSideEntry && SrcMBB->
pred_size() > 1)
413 return Not_SinglePath;
416 return SinglePath_NotInPath;
417 return Not_SinglePath;
420 int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
421 MBBVector::const_iterator E)
const {
424 if (!isRetiredBlock(*It))
432 unsigned BlockSizeThreshold = 30;
433 unsigned CloneInstrThreshold = 100;
434 bool MultiplePreds = MBB && (MBB->
pred_size() > 1);
438 unsigned BlkSize = MBB->
size();
439 return ((BlkSize > BlockSizeThreshold) &&
440 (BlkSize * (MBB->
pred_size() - 1) > CloneInstrThreshold));
443 void AMDGPUCFGStructurizer::reversePredicateSetter(
446 if (I->getOpcode() == AMDGPU::PRED_X) {
447 switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
504 void AMDGPUCFGStructurizer::insertCondBranchBefore(
529 int NewOpcode,
int RegNum) {
538 int AMDGPUCFGStructurizer::getBranchNzeroOpcode(
int OldOpcode) {
540 case AMDGPU::JUMP_COND:
541 case AMDGPU::JUMP:
return AMDGPU::IF_PREDICATE_SET;
542 case AMDGPU::BRANCH_COND_i32:
543 case AMDGPU::BRANCH_COND_f32:
return AMDGPU::IF_LOGICALNZ_f32;
549 int AMDGPUCFGStructurizer::getBranchZeroOpcode(
int OldOpcode) {
551 case AMDGPU::JUMP_COND:
552 case AMDGPU::JUMP:
return AMDGPU::IF_PREDICATE_SET;
553 case AMDGPU::BRANCH_COND_i32:
554 case AMDGPU::BRANCH_COND_f32:
return AMDGPU::IF_LOGICALZ_f32;
560 int AMDGPUCFGStructurizer::getContinueNzeroOpcode(
int OldOpcode) {
562 case AMDGPU::JUMP_COND:
563 case AMDGPU::JUMP:
return AMDGPU::CONTINUE_LOGICALNZ_i32;
569 int AMDGPUCFGStructurizer::getContinueZeroOpcode(
int OldOpcode) {
571 case AMDGPU::JUMP_COND:
572 case AMDGPU::JUMP:
return AMDGPU::CONTINUE_LOGICALZ_i32;
582 void AMDGPUCFGStructurizer::setTrueBranch(
MachineInstr *MI,
595 return (*It == TrueBranch) ? *Next : *It;
598 bool AMDGPUCFGStructurizer::isCondBranch(
MachineInstr *MI) {
600 case AMDGPU::JUMP_COND:
601 case AMDGPU::BRANCH_COND_i32:
602 case AMDGPU::BRANCH_COND_f32:
return true;
609 bool AMDGPUCFGStructurizer::isUncondBranch(
MachineInstr *MI) {
632 MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
636 if (MI && (isCondBranch(MI) || isUncondBranch(MI)))
641 MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
648 if (isCondBranch(MI) || isUncondBranch(MI))
659 if (It != MBB->
rend()) {
669 if (It != MBB->
rend()) {
685 <<
" is return block without RETURN instr\n";);
692 iterEnd = SrcMBB->
succ_end(); It != iterEnd; ++It)
708 void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
711 MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
712 if (BranchMI && isCondBranch(BranchMI) &&
713 getTrueBranch(BranchMI) == OldMBB)
714 setTrueBranch(BranchMI, NewBlk);
720 &&
"found a jump table");
728 if (Pre->getOpcode() == AMDGPU::CONTINUE
729 && It->getOpcode() == AMDGPU::ENDLOOP)
736 for (
unsigned i = 0; i < ContInstr.
size(); ++i)
737 ContInstr[i]->eraseFromParent();
747 bool AMDGPUCFGStructurizer::prepare() {
748 bool Changed =
false;
752 DEBUG(
dbgs() <<
"AMDGPUCFGStructurizer::prepare\n";);
754 orderBlocks(FuncRep);
760 E = MLI->end(); It != E; ++It) {
762 MBBVector ExitingMBBs;
765 if (ExitingMBBs.size() == 0) {
775 It = OrderedBlks.
begin(), E = OrderedBlks.end(); It != E; ++It) {
777 removeUnconditionalBranch(MBB);
778 removeRedundantConditionalBranch(MBB);
779 if (isReturnBlock(MBB)) {
785 if (RetBlks.
size() >= 2) {
786 addDummyExitBlock(RetBlks);
793 bool AMDGPUCFGStructurizer::run() {
796 DEBUG(
dbgs() <<
"AMDGPUCFGStructurizer::run\n");
800 ReverseVector(orderedBlks);
803 DEBUG(
dbgs() <<
"Ordered blocks:\n"; printOrderedBlocks(););
807 bool MakeProgress =
false;
808 int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
814 dbgs() <<
"numIter = " << NumIter
815 <<
", numRemaintedBlk = " << NumRemainedBlk <<
"\n";
837 SccNumBlk = NumRemainedBlk;
839 dbgs() <<
"start processing SCC" << getSCCNum(SccBeginMBB);
844 if (!isRetiredBlock(MBB))
849 bool ContNextScc =
true;
851 || getSCCNum(SccBeginMBB) != getSCCNum(*It)) {
854 int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
855 if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
857 dbgs() <<
"Can't reduce SCC " << getSCCNum(MBB)
858 <<
", sccNumIter = " << SccNumIter;
859 dbgs() <<
"doesn't make any progress\n";
862 }
else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
863 SccNumBlk = sccRemainedNumBlk;
867 dbgs() <<
"repeat processing SCC" << getSCCNum(MBB)
868 <<
"sccNumIter = " << SccNumIter <<
'\n';
880 SccBeginMBB =
nullptr;
888 dbgs() <<
"Reduce to one block\n";
891 int NewnumRemainedBlk
892 = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
894 if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
896 NumRemainedBlk = NewnumRemainedBlk;
898 MakeProgress =
false;
900 dbgs() <<
"No progress\n";
904 }
while (!Finish && MakeProgress);
910 for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.
end();
912 if ((*It).second && (*It).second->IsRetired) {
913 assert(((*It).first)->getNumber() != -1);
915 dbgs() <<
"Erase BB" << ((*It).first)->getNumber() <<
"\n";
917 (*It).first->eraseFromParent();
921 BlockInfoMap.clear();
925 DEBUG(FuncRep->viewCFG());
939 const std::vector<MachineBasicBlock *> &SccNext = *It;
940 for (std::vector<MachineBasicBlock *>::const_iterator
941 blockIter = SccNext.begin(), blockEnd = SccNext.end();
942 blockIter != blockEnd; ++blockIter) {
945 recordSccnum(MBB, SccNum);
952 for (; It != E; ++It) {
954 SccNum = getSCCNum(MBB);
956 dbgs() <<
"unreachable block BB" << MBB->
getNumber() <<
"\n";
965 dbgs() <<
"Begin patternMatch BB" << MBB->
getNumber() <<
"\n";
968 while ((CurMatch = patternMatchGroup(MBB)) > 0)
969 NumMatch += CurMatch;
973 <<
", numMatch = " << NumMatch <<
"\n";
981 NumMatch += loopendPatternMatch();
982 NumMatch += serialPatternMatch(MBB);
983 NumMatch += ifPatternMatch(MBB);
993 if (childBlk->
pred_size() != 1 || isActiveLoophead(childBlk))
996 mergeSerialBlock(MBB, childBlk);
997 ++numSerialPatternMatch;
1005 if (hasBackEdge(MBB))
1007 MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
1011 assert(isCondBranch(BranchMI));
1015 NumMatch += serialPatternMatch(TrueMBB);
1016 NumMatch += ifPatternMatch(TrueMBB);
1018 NumMatch += serialPatternMatch(FalseMBB);
1019 NumMatch += ifPatternMatch(FalseMBB);
1038 reversePredicateSetter(MBB->
end());
1042 && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
1045 && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
1048 return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
1056 || (FalseMBB && FalseMBB->
pred_size() > 1))) {
1057 Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
1060 if (TrueMBB && TrueMBB->
pred_size() > 1) {
1061 TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
1065 if (FalseMBB && FalseMBB->
pred_size() > 1) {
1066 FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
1070 mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
1072 ++numIfPatternMatch;
1074 numClonedBlock += Cloned;
1076 return 1 + Cloned + NumMatch;
1079 int AMDGPUCFGStructurizer::loopendPatternMatch() {
1080 std::deque<MachineLoop *> NestedLoops;
1081 for (
auto &It: *MLI)
1083 NestedLoops.push_front(ML);
1085 if (NestedLoops.size() == 0)
1093 if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
1095 DEBUG(
dbgs() <<
"Processing:\n"; ExaminedLoop->dump(););
1096 int NumBreak = mergeLoop(ExaminedLoop);
1104 int AMDGPUCFGStructurizer::mergeLoop(
MachineLoop *LoopRep) {
1106 MBBVector ExitingMBBs;
1108 assert(!ExitingMBBs.empty() &&
"Infinite Loop not supported");
1109 DEBUG(
dbgs() <<
"Loop has " << ExitingMBBs.size() <<
" exiting blocks\n";);
1114 for (
unsigned i = 0, e = ExitBlks.size(); i < e; ++i)
1115 ExitBlkSet.
insert(ExitBlks[i]);
1116 assert(ExitBlkSet.
size() == 1);
1118 assert(ExitBlk &&
"Loop has several exit block");
1119 MBBVector LatchBlks;
1121 InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(LoopHeader),
1122 PE = InvMBBTraits::child_end(LoopHeader);
1123 for (; PI != PE; PI++) {
1125 LatchBlks.push_back(*PI);
1128 for (
unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
1129 mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
1130 for (
unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
1131 settleLoopcontBlock(LatchBlks[i], LoopHeader);
1135 Match += serialPatternMatch(LoopHeader);
1136 Match += ifPatternMatch(LoopHeader);
1137 }
while (Match > 0);
1138 mergeLooplandBlock(LoopHeader, ExitBlk);
1141 MLI->changeLoopFor(LoopHeader, ParentLoop);
1143 MLI->removeBlock(LoopHeader);
1144 Visited[LoopRep] =
true;
1148 int AMDGPUCFGStructurizer::loopcontPatternMatch(
MachineLoop *LoopRep,
1153 GTIM::ChildIteratorType It = GTIM::child_begin(LoopHeader),
1154 E = GTIM::child_end(LoopHeader);
1155 for (; It != E; ++It) {
1158 handleLoopcontBlock(MBB, MLI->getLoopFor(MBB),
1159 LoopHeader, LoopRep);
1166 E = ContMBB.
end(); It != E; ++It) {
1167 (*It)->removeSuccessor(LoopHeader);
1170 numLoopcontPatternMatch += NumCont;
1176 bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
1180 if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
1184 dbgs() <<
"isLoopContBreakBlock yes src1 = BB"
1186 <<
" src2 = BB" << Src2MBB->
getNumber() <<
"\n";
1197 int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
1200 dbgs() <<
"handleJumpintoIf swap trueBlk and FalseBlk" <<
"\n";
1202 Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
1216 dbgs() <<
"handleJumpintoIfImp head = BB" << HeadMBB->
getNumber()
1219 <<
" false = BB" << FalseMBB->
getNumber() <<
"\n";
1227 if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
1229 dbgs() <<
" working\n";
1232 Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
1233 Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
1235 numClonedBlock += Num;
1236 Num += serialPatternMatch(*HeadMBB->
succ_begin());
1237 Num += serialPatternMatch(*std::next(HeadMBB->
succ_begin()));
1238 Num += ifPatternMatch(HeadMBB);
1244 dbgs() <<
" not working\n";
1246 DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) :
nullptr;
1252 void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
1256 <<
" size = " << HeadMBB->
size();
1264 dbgs() <<
", true = BB" << TrueMBB->
getNumber() <<
" size = "
1265 << TrueMBB->
size() <<
" numPred = " << TrueMBB->
pred_size();
1273 dbgs() <<
", false = BB" << FalseMBB->
getNumber() <<
" size = "
1274 << FalseMBB->
size() <<
" numPred = " << FalseMBB->
pred_size();
1282 dbgs() <<
", land = BB" << LandMBB->
getNumber() <<
" size = "
1283 << LandMBB->
size() <<
" numPred = " << LandMBB->
pred_size();
1297 bool MigrateTrue =
false;
1298 bool MigrateFalse =
false;
1302 assert((!TrueMBB || TrueMBB->
succ_size() <= 1)
1303 && (!FalseMBB || FalseMBB->
succ_size() <= 1));
1305 if (TrueMBB == FalseMBB)
1308 MigrateTrue = needMigrateBlock(TrueMBB);
1309 MigrateFalse = needMigrateBlock(FalseMBB);
1311 if (!MigrateTrue && !MigrateFalse)
1317 if (!MigrateTrue && TrueMBB && TrueMBB->
pred_size() > 1)
1319 if (!MigrateFalse && FalseMBB && FalseMBB->
pred_size() > 1)
1320 MigrateFalse =
true;
1323 dbgs() <<
"before improveSimpleJumpintoIf: ";
1324 showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
1339 if (!MigrateTrue || !MigrateFalse) {
1410 bool LandBlkHasOtherPred = (LandBlk->
pred_size() > 2);
1415 if (LandBlkHasOtherPred) {
1417 unsigned CmpResReg =
1420 insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
1428 insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
1432 migrateInstruction(TrueMBB, LandBlk, I);
1438 insertInstrBefore(I, AMDGPU::ELSE);
1441 migrateInstruction(FalseMBB, LandBlk, I);
1448 if (LandBlkHasOtherPred) {
1450 insertInstrBefore(I, AMDGPU::ENDIF);
1454 PE = LandBlk->
pred_end(); PI != PE; ++PI) {
1456 if (MBB != TrueMBB && MBB != FalseMBB)
1461 dbgs() <<
"result from improveSimpleJumpintoIf: ";
1462 showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
1466 *LandMBBPtr = LandBlk;
1475 <<
" header = BB" << ContMBB->
getNumber() <<
"\n";
1476 dbgs() <<
"Trying to continue loop-depth = "
1477 << getLoopDepth(ContLoop)
1478 <<
" from loop-depth = " << getLoopDepth(ContingLoop) <<
"\n";);
1479 settleLoopcontBlock(ContingMBB, ContMBB);
1486 <<
" <= BB" << SrcMBB->
getNumber() <<
"\n";
1491 cloneSuccessorList(DstMBB, SrcMBB);
1493 removeSuccessor(SrcMBB);
1494 MLI->removeBlock(SrcMBB);
1495 retireBlock(SrcMBB);
1498 void AMDGPUCFGStructurizer::mergeIfthenelseBlock(
MachineInstr *BranchMI,
1508 dbgs() <<
" } else ";
1514 dbgs() <<
"landBlock: ";
1535 insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
1543 retireBlock(TrueMBB);
1544 MLI->removeBlock(TrueMBB);
1548 insertInstrBefore(I, AMDGPU::ELSE);
1552 if (LandMBB && FalseMBB->
succ_size() != 0)
1554 retireBlock(FalseMBB);
1555 MLI->removeBlock(FalseMBB);
1557 insertInstrBefore(I, AMDGPU::ENDIF);
1561 if (LandMBB && TrueMBB && FalseMBB)
1569 <<
" land = BB" << LandMBB->
getNumber() <<
"\n";);
1571 insertInstrBefore(DstBlk, AMDGPU::WHILELOOP,
DebugLoc());
1572 insertInstrEnd(DstBlk, AMDGPU::ENDLOOP,
DebugLoc());
1581 <<
" land = BB" << LandMBB->
getNumber() <<
"\n";);
1582 MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
1583 assert(BranchMI && isCondBranch(BranchMI));
1584 DebugLoc DL = BranchMI->getDebugLoc();
1587 if (TrueBranch != LandMBB)
1588 reversePredicateSetter(I);
1589 insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL);
1590 insertInstrBefore(I, AMDGPU::BREAK);
1591 insertInstrBefore(I, AMDGPU::ENDIF);
1593 BranchMI->eraseFromParent();
1600 DEBUG(
dbgs() <<
"settleLoopcontBlock conting = BB"
1602 <<
", cont = BB" << ContMBB->
getNumber() <<
"\n";);
1604 MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
1606 assert(isCondBranch(MI));
1609 int OldOpcode = MI->getOpcode();
1612 bool UseContinueLogical = ((&*ContingMBB->
rbegin()) == MI);
1614 if (!UseContinueLogical) {
1616 TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
1617 getBranchZeroOpcode(OldOpcode);
1618 insertCondBranchBefore(I, BranchOpcode, DL);
1620 insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
1621 insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
1624 TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
1625 getContinueZeroOpcode(OldOpcode);
1626 insertCondBranchBefore(I, BranchOpcode, DL);
1629 MI->eraseFromParent();
1636 insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
1637 getLastDebugLocInBB(ContingMBB));
1645 while (SrcMBB && SrcMBB != DstMBB) {
1648 SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
1663 "succBlk is not a prececessor of curBlk");
1666 replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
1673 cloneSuccessorList(CloneMBB, MBB);
1675 numClonedInstr += MBB->
size();
1678 dbgs() <<
"Cloned block: " <<
"BB"
1682 SHOWNEWBLK(CloneMBB,
"result of Cloned block: ");
1691 MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
1694 dbgs() <<
"migrateInstruction don't see branch instr\n" ;
1696 SpliceEnd = SrcMBB->
end();
1699 dbgs() <<
"migrateInstruction see branch instr\n" ;
1702 SpliceEnd = BranchMI;
1705 dbgs() <<
"migrateInstruction before splice dstSize = " << DstMBB->
size()
1706 <<
"srcSize = " << SrcMBB->
size() <<
"\n";
1710 DstMBB->
splice(I, SrcMBB, SrcMBB->
begin(), SpliceEnd);
1713 dbgs() <<
"migrateInstruction after splice dstSize = " << DstMBB->
size()
1714 <<
"srcSize = " << SrcMBB->
size() <<
"\n";
1719 AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(
MachineLoop* LoopRep) {
1723 if (!LoopHeader || !LoopLatch)
1725 MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
1727 if (!BranchMI || !isUncondBranch(BranchMI))
1732 SHOWNEWBLK(DummyExitBlk,
"DummyExitBlock to normalize infiniteLoop: ");
1733 DEBUG(
dbgs() <<
"Old branch instr: " << *BranchMI <<
"\n";);
1735 Ctx.
emitError(
"Extra register needed to handle CFG");
1744 while ((BranchMI = getLoopendBlockBranchInstr(MBB))
1745 && isUncondBranch(BranchMI)) {
1746 DEBUG(
dbgs() <<
"Removing uncond branch instr"; BranchMI->
dump(););
1751 void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
1760 MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
1761 assert(BranchMI && isCondBranch(BranchMI));
1762 DEBUG(
dbgs() <<
"Removing unneeded cond branch instr"; BranchMI->
dump(););
1764 SHOWNEWBLK(MBB1,
"Removing redundant successor");
1768 void AMDGPUCFGStructurizer::addDummyExitBlock(
1775 E = RetMBB.
end(); It != E; ++It) {
1786 SHOWNEWBLK(DummyExitBlk,
"DummyExitBlock: ");
1796 BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
1798 srcBlkInfo =
new BlockInformation();
1799 srcBlkInfo->SccNum = SccNum;
1807 BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
1810 SrcBlkInfo =
new BlockInformation();
1812 SrcBlkInfo->IsRetired =
true;
1814 &&
"can't retire block yet");
1817 void AMDGPUCFGStructurizer::setLoopLandBlock(
MachineLoop *loopRep,
1821 MBB = FuncRep->CreateMachineBasicBlock();
1823 SHOWNEWBLK(MBB,
"DummyLandingBlock for loop without break: ");
1827 dbgs() <<
"setLoopLandBlock loop-header = BB"
1829 <<
" landing-block = BB" << MBB->
getNumber() <<
"\n";
1837 if (PDT->dominates(MBB1, MBB2))
1839 if (PDT->dominates(MBB2, MBB1))
1847 return findNearestCommonPostDom(*MBB1->
succ_begin(), MBB2);
1849 return findNearestCommonPostDom(MBB1, *MBB2->
succ_begin());
1851 if (!Node1 || !Node2)
1856 if (PDT->dominates(Node1, Node2))
1865 AMDGPUCFGStructurizer::findNearestCommonPostDom(
1866 std::set<MachineBasicBlock *> &MBBs) {
1868 std::set<MachineBasicBlock *>::const_iterator It = MBBs.
begin();
1869 std::set<MachineBasicBlock *>::const_iterator E = MBBs.
end();
1870 for (CommonDom = *It; It != E && CommonDom; ++It) {
1872 if (MBB != CommonDom)
1873 CommonDom = findNearestCommonPostDom(MBB, CommonDom);
1877 dbgs() <<
"Common post dominator for exit blocks is ";
1879 dbgs() <<
"BB" << CommonDom->getNumber() <<
"\n";
1893 "AMDGPU CFG Structurizer",
false,
false)
1901 return new AMDGPUCFGStructurizer();
unsigned succ_size() const
void push_back(const T &Elt)
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImp=false)
CreateMachineInstr - Allocate a new MachineInstr.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
AMDGPU specific subclass of TargetSubtarget.
STATISTIC(NumFunctions,"Total number of functions")
Interface definition for R600InstrInfo.
MachineBasicBlock * getMBB() const
int getNumber() const
getNumber - MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a M...
A Module instance is used to store all the information related to an LLVM module. ...
DominatorTree GraphTraits specialization so the DominatorTree can be iterable by generic graph iterat...
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LoopT * getParentLoop() const
#define OPCODE_IS_NOT_ZERO_INT
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
FunctionPass * createAMDGPUCFGStructurizerPass()
BlockT * getHeader() const
MachineFunctionAnalysis - This class is a Pass that manages a MachineFunction object.
BlockT * getLoopLatch() const
getLoopLatch - If there is a single latch block for this loop, return it.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void initializeAMDGPUCFGStructurizerPass(PassRegistry &)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
std::vector< MachineBasicBlock * >::iterator succ_iterator
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
getExitingBlocks - Return all blocks inside the loop that have successors outside of the loop...
INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer,"amdgpustructurizer","AMDGPU CFG Structurizer", false, false) INITIALIZE_PASS_END(AMDGPUCFGStructurizer
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
getExitBlocks - Return all of the successor blocks of this loop.
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
void emitError(unsigned LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
const HexagonRegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
#define OPCODE_IS_NOT_ZERO
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
Base class for the actual dominator tree node.
std::vector< MachineBasicBlock * >::iterator pred_iterator
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
reverse_iterator rbegin()
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bundle_iterator< MachineInstr, instr_iterator > iterator
This is an important class for using LLVM in a threaded context.
const MachineOperand & getOperand(unsigned i) const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
#define SHOWNEWBLK(b, msg)
void setMBB(MachineBasicBlock *MBB)
Represent the analysis usage information of a pass.
bool contains(const LoopT *L) const
contains - Return true if the specified loop is contained within in this loop.
FunctionPass class - This class is used to implement most global optimizations.
bool isEmpty() const
isEmpty - Return true if there are no jump tables.
succ_iterator succ_begin()
void removeSuccessor(MachineBasicBlock *succ)
removeSuccessor - Remove successor from the successors list of this MachineBasicBlock.
pred_iterator pred_begin()
DomTreeNodeBase< NodeT > * getIDom() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
CloneMachineInstr - Create a new MachineInstr which is a copy of the 'Orig' instruction, identical in all ways except the instruction has no parent, prev, or next.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
bool isSuccessor(const MachineBasicBlock *MBB) const
isSuccessor - Return true if the specified MBB is a successor of this block.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
Representation of each machine instruction.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
void push_back(MachineInstr *MI)
iterator_range< df_iterator< T > > depth_first(const T &G)
unsigned getReg() const
getReg - Returns the register number.
virtual const TargetInstrInfo * getInstrInfo() const
std::reverse_iterator< iterator > reverse_iterator
void push_back(MachineBasicBlock *MBB)
BasicBlockListType::iterator iterator
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
std::vector< LoopT * >::const_iterator iterator
void print(raw_ostream &OS, SlotIndexes *=nullptr) const
LoopInfoBase< MachineBasicBlock, MachineLoop >::iterator iterator
iterator/begin/end - The interface to the top-level loops in the current function.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
addReg - Add a new virtual register operand...
unsigned getLoopDepth() const
getLoopDepth - Return the nesting level of this loop.
void addSuccessor(MachineBasicBlock *succ, uint32_t weight=0)
addSuccessor - Add succ as a successor of this MachineBasicBlock.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
unsigned pred_size() const
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
#define OPCODE_IS_ZERO_INT