Go to the documentation of this file.
63 #define DEBUG_TYPE "simple-loop-unswitch"
67 STATISTIC(NumBranches,
"Number of branches unswitched");
68 STATISTIC(NumSwitches,
"Number of switches unswitched");
69 STATISTIC(NumGuards,
"Number of guards turned into branches for unswitching");
70 STATISTIC(NumTrivial,
"Number of unswitches that are trivial");
72 NumCostMultiplierSkipped,
73 "Number of unswitch candidates that had their cost multiplier skipped");
77 cl::desc(
"Forcibly enables non-trivial loop unswitching rather than "
78 "following the configuration passed into the pass."));
82 cl::desc(
"The cost threshold for unswitching a loop."));
86 cl::desc(
"Enable unswitch cost multiplier that prohibits exponential "
87 "explosion in nontrivial unswitch."));
90 cl::desc(
"Toplevel siblings divisor for cost multiplier."));
93 cl::desc(
"Number of unswitch candidates that are ignored when calculating "
97 cl::desc(
"If enabled, simple loop unswitching will also consider "
98 "llvm.experimental.guard intrinsics as unswitch candidates."));
100 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
102 cl::desc(
"If enabled, drop make.implicit metadata in unswitched implicit "
103 "null checks to save time analyzing if we can keep it."));
116 "Only need to walk the graph if root itself is not invariant.");
122 Worklist.push_back(&Root);
126 for (
Value *OpV :
I.operand_values()) {
128 if (isa<Constant>(OpV))
143 if (Visited.
insert(OpI).second)
144 Worklist.push_back(OpI);
146 }
while (!Worklist.empty());
153 assert(!isa<Constant>(Invariant) &&
"Why are we unswitching on a constant?");
158 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
171 auto *PN = dyn_cast<PHINode>(&
I);
196 Direction ? &NormalSucc : &UnswitchedSucc);
213 for (
auto i : seq<int>(0, PN.getNumOperands())) {
214 assert(PN.getIncomingBlock(
i) == &OldExitingBB &&
215 "Found incoming block different from unique predecessor!");
216 PN.setIncomingBlock(
i, &OldPH);
233 assert(&ExitBB != &UnswitchedBB &&
234 "Must have different loop exit and unswitched blocks!");
238 PN.getName() +
".split", InsertPt);
249 for (
int i = PN.getNumIncomingValues() - 1;
i >= 0; --
i) {
250 if (PN.getIncomingBlock(
i) != &OldExitingBB)
253 Value *Incoming = PN.getIncomingValue(
i);
256 PN.removeIncomingValue(
i);
258 NewPN->addIncoming(Incoming, &OldPH);
264 NewPN->addIncoming(&PN, &ExitBB);
283 Loop *NewParentL =
nullptr;
284 for (
auto *ExitBB : Exits)
286 if (!NewParentL || NewParentL->
contains(ExitL))
289 if (NewParentL == OldParentL)
295 "Can only hoist this loop up the nest!");
300 "Parent loop of this loop should contain this loop's preheader!");
315 for (
Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
319 return BB == &Preheader || L.contains(BB);
322 OldContainingL->getBlocksSet().erase(&Preheader);
324 OldContainingL->getBlocksSet().erase(
BB);
346 Loop *Current = TopMost;
376 LLVM_DEBUG(
dbgs() <<
" Trying to unswitch branch: " << BI <<
"\n");
383 bool FullUnswitch =
false;
389 if (
auto *CondInst = dyn_cast<Instruction>(BI.
getCondition()))
391 if (Invariants.
empty())
397 bool ExitDirection =
true;
398 int LoopExitSuccIdx = 0;
401 ExitDirection =
false;
407 auto *ContinueBB = BI.
getSuccessor(1 - LoopExitSuccIdx);
419 if (cast<Instruction>(BI.
getCondition())->getOpcode() != Instruction::Or)
422 if (cast<Instruction>(BI.
getCondition())->getOpcode() != Instruction::And)
428 dbgs() <<
" unswitching trivial invariant conditions for: " << BI
430 for (
Value *Invariant : Invariants) {
431 dbgs() <<
" " << *Invariant <<
" == true";
432 if (Invariant != Invariants.back())
463 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
465 "A branch's parent isn't a predecessor!");
466 UnswitchedBB = LoopExitBB;
469 SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI, MSSAU);
488 ParentBB->getInstList().push_back(BI.
clone());
502 "Must have an `or` of `i1`s for the condition!");
506 "Must have an `and` of `i1`s for the condition!");
508 *UnswitchedBB, *NewPH);
526 ParentBB->getTerminator()->eraseFromParent();
538 if (UnswitchedBB == LoopExitBB)
542 *ParentBB, *OldPH, FullUnswitch);
553 for (
Value *Invariant : Invariants)
600 Value *LoopCond =
SI.getCondition();
606 auto *ParentBB =
SI.getParent();
613 auto IsTriviallyUnswitchableExitBlock = [&](
BasicBlock &BBToCheck) {
624 auto *TI = BBToCheck.getTerminator();
625 bool isUnreachable = isa<UnreachableInst>(TI);
626 return !isUnreachable ||
627 (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI));
631 for (
auto Case :
SI.cases())
632 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
633 ExitCaseIndices.push_back(Case.getCaseIndex());
637 if (IsTriviallyUnswitchableExitBlock(*
SI.getDefaultDest())) {
638 DefaultExitBB =
SI.getDefaultDest();
639 }
else if (ExitCaseIndices.empty())
654 SI.setDefaultDest(
nullptr);
657 if (!ExitL || ExitL->
contains(OuterL))
666 ExitCases.reserve(ExitCaseIndices.size());
671 auto CaseI =
SI.case_begin() +
Index;
674 if (!ExitL || ExitL->
contains(OuterL))
678 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(),
W);
693 if (
SI.getNumCases() > 0 &&
695 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
697 CommonSuccBB =
SI.case_begin()->getCaseSuccessor();
698 if (!DefaultExitBB) {
702 if (
SI.getNumCases() == 0)
703 CommonSuccBB =
SI.getDefaultDest();
704 else if (
SI.getDefaultDest() != CommonSuccBB)
705 CommonSuccBB =
nullptr;
731 UnswitchedExitBBs.
insert(DefaultExitBB);
739 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
744 for (
auto &ExitCase :
reverse(ExitCases)) {
752 if (UnswitchedExitBBs.
insert(ExitBB).second)
759 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
768 std::get<1>(ExitCase) = SplitExitBB;
773 for (
auto &ExitCase :
reverse(ExitCases)) {
775 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
777 NewSIW.
addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
788 for (
const auto &Case :
SI.cases())
791 }
else if (DefaultCaseWeight) {
793 uint64_t SW = *DefaultCaseWeight;
794 for (
const auto &Case :
SI.cases()) {
797 "case weight must be defined as default case weight is defined");
812 bool SkippedFirst = DefaultExitBB ==
nullptr;
813 for (
auto Case :
SI.cases()) {
815 "Non-common successor!");
827 }
else if (DefaultExitBB) {
829 "If we had no cases we'd have a common successor!");
834 auto LastCaseI = std::prev(
SI.case_end());
836 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
847 for (
auto *UnswitchedExitBB : UnswitchedExitBBs) {
848 DTUpdates.push_back({DT.
Delete, ParentBB, UnswitchedExitBB});
849 DTUpdates.push_back({DT.
Insert, OldPH, UnswitchedExitBB});
851 for (
auto SplitUnswitchedPair : SplitExitBBMap) {
852 DTUpdates.push_back({DT.
Delete, ParentBB, SplitUnswitchedPair.first});
853 DTUpdates.push_back({DT.
Insert, OldPH, SplitUnswitchedPair.second});
894 bool Changed =
false;
909 Visited.
insert(CurrentBB);
916 if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
924 if (
auto *
SI = dyn_cast<SwitchInst>(CurrentTerm)) {
928 if (isa<Constant>(
SI->getCondition()))
943 if (!BI || BI->isConditional())
946 CurrentBB = BI->getSuccessor(0);
950 auto *BI = dyn_cast<BranchInst>(CurrentTerm);
958 if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
972 if (BI->isConditional())
976 CurrentBB = BI->getSuccessor(0);
981 }
while (L.
contains(CurrentBB) && Visited.
insert(CurrentBB).second);
1028 NewBlocks.push_back(NewBB);
1029 VMap[OldBB] = NewBB;
1037 auto It = DominatingSucc.
find(
BB);
1038 return It != DominatingSucc.
end() && It->second != UnswitchedSuccBB;
1042 auto *ClonedPH = CloneBlock(LoopPH);
1045 for (
auto *LoopBB : L.
blocks())
1046 if (!SkipBlock(LoopBB))
1052 for (
auto *ExitBB : ExitBlocks) {
1053 if (SkipBlock(ExitBB))
1061 auto *MergeBB =
SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
1066 MergeBB->takeName(ExitBB);
1067 ExitBB->setName(
Twine(MergeBB->getName()) +
".split");
1070 auto *ClonedExitBB = CloneBlock(ExitBB);
1071 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1072 "Exit block should have been split to have one successor!");
1073 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1074 "Cloned exit block has the wrong successor!");
1080 std::prev(ClonedExitBB->end())))) {
1087 (isa<PHINode>(
I) || isa<LandingPadInst>(
I) || isa<CatchPadInst>(
I)) &&
1088 "Bad instruction in exit block!");
1090 assert(VMap.
lookup(&
I) == &ClonedI &&
"Mismatch in the value map!");
1094 &*MergeBB->getFirstInsertionPt());
1095 I.replaceAllUsesWith(MergePN);
1096 MergePN->addIncoming(&
I, ExitBB);
1097 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1106 for (
auto *ClonedBB : NewBlocks)
1110 if (
auto *II = dyn_cast<IntrinsicInst>(&
I))
1111 if (II->getIntrinsicID() == Intrinsic::assume)
1117 for (
auto *LoopBB : L.
blocks())
1118 if (SkipBlock(LoopBB))
1120 if (
auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.
lookup(SuccBB)))
1121 for (
PHINode &PN : ClonedSuccBB->phis())
1122 PN.removeIncomingValue(LoopBB,
false);
1126 auto *ClonedParentBB = cast<BasicBlock>(VMap.
lookup(ParentBB));
1128 if (SuccBB == UnswitchedSuccBB)
1131 auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.
lookup(SuccBB));
1135 ClonedSuccBB->removePredecessor(ClonedParentBB,
1141 auto *ClonedSuccBB = cast<BasicBlock>(VMap.
lookup(UnswitchedSuccBB));
1142 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1145 Value *ClonedConditionToErase =
nullptr;
1146 if (
auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1147 ClonedConditionToErase = BI->getCondition();
1148 else if (
auto *
SI = dyn_cast<SwitchInst>(ClonedTerminator))
1149 ClonedConditionToErase =
SI->getCondition();
1154 if (ClonedConditionToErase)
1161 for (
PHINode &PN : ClonedSuccBB->phis()) {
1165 for (
int i = PN.getNumOperands() - 1;
i >= 0; --
i) {
1166 if (PN.getIncomingBlock(
i) != ClonedParentBB)
1172 PN.removeIncomingValue(
i,
false);
1178 for (
auto *ClonedBB : NewBlocks) {
1180 if (SuccSet.
insert(SuccBB).second)
1196 auto AddClonedBlocksToLoop = [&](
Loop &OrigL,
Loop &ClonedL) {
1197 assert(ClonedL.getBlocks().empty() &&
"Must start with an empty loop!");
1200 auto *ClonedBB = cast<BasicBlock>(VMap.
lookup(
BB));
1201 ClonedL.addBlockEntry(ClonedBB);
1214 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1226 LoopsToClone.push_back({ClonedRootL, ChildL});
1228 Loop *ClonedParentL, *L;
1229 std::tie(ClonedParentL, L) = LoopsToClone.
pop_back_val();
1232 AddClonedBlocksToLoop(*L, *ClonedL);
1234 LoopsToClone.push_back({ClonedL, ChildL});
1235 }
while (!LoopsToClone.empty());
1256 Loop *ClonedL =
nullptr;
1261 auto *ClonedPH = cast<BasicBlock>(VMap.
lookup(OrigPH));
1262 auto *ClonedHeader = cast<BasicBlock>(VMap.
lookup(OrigHeader));
1268 Loop *ParentL =
nullptr;
1272 for (
auto *ExitBB : ExitBlocks)
1273 if (
auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.
lookup(ExitBB)))
1275 ExitLoopMap[ClonedExitBB] = ExitL;
1276 ClonedExitsInLoops.push_back(ClonedExitBB);
1277 if (!ParentL || (ParentL != ExitL && ParentL->
contains(ExitL)))
1282 "The computed parent loop should always contain (or be) the parent of "
1283 "the original loop.");
1291 if (
auto *ClonedBB = cast_or_null<BasicBlock>(VMap.
lookup(
BB)))
1292 ClonedLoopBlocks.
insert(ClonedBB);
1303 if (Pred == ClonedPH)
1308 assert(ClonedLoopBlocks.
count(Pred) &&
"Found a predecessor of the loop "
1309 "header other than the preheader "
1310 "that is not part of the loop!");
1315 if (BlocksInClonedLoop.
insert(Pred).second && Pred != ClonedHeader)
1316 Worklist.push_back(Pred);
1322 if (!BlocksInClonedLoop.
empty()) {
1323 BlocksInClonedLoop.
insert(ClonedHeader);
1325 while (!Worklist.empty()) {
1328 "Didn't put block into the loop set!");
1336 if (ClonedLoopBlocks.
count(Pred) &&
1337 BlocksInClonedLoop.
insert(Pred).second)
1338 Worklist.push_back(Pred);
1348 NonChildClonedLoops.push_back(ClonedL);
1357 auto *ClonedBB = cast_or_null<BasicBlock>(VMap.
lookup(
BB));
1358 if (!ClonedBB || !BlocksInClonedLoop.
count(ClonedBB))
1371 PL->addBlockEntry(ClonedBB);
1378 for (
Loop *ChildL : OrigL) {
1379 auto *ClonedChildHeader =
1380 cast_or_null<BasicBlock>(VMap.
lookup(ChildL->getHeader()));
1381 if (!ClonedChildHeader || !BlocksInClonedLoop.
count(ClonedChildHeader))
1387 for (
auto *ChildLoopBB : ChildL->blocks())
1389 cast<BasicBlock>(VMap.
lookup(ChildLoopBB))) &&
1390 "Child cloned loop has a header within the cloned outer "
1391 "loop but not all of its blocks!");
1406 if (BlocksInClonedLoop.
empty())
1407 UnloopedBlockSet.
insert(ClonedPH);
1408 for (
auto *ClonedBB : ClonedLoopBlocks)
1409 if (!BlocksInClonedLoop.
count(ClonedBB))
1410 UnloopedBlockSet.
insert(ClonedBB);
1416 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1418 return ExitLoopMap.
lookup(LHS)->getLoopDepth() <
1424 while (!UnloopedBlockSet.
empty() && !OrderedClonedExitsInLoops.empty()) {
1425 assert(Worklist.empty() &&
"Didn't clear worklist!");
1427 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1432 Worklist.push_back(ExitBB);
1442 if (!UnloopedBlockSet.
erase(PredBB)) {
1444 (BlocksInClonedLoop.
count(PredBB) || ExitLoopMap.
count(PredBB)) &&
1445 "Predecessor not mapped to a loop!");
1452 bool Inserted = ExitLoopMap.
insert({PredBB, ExitL}).second;
1454 assert(Inserted &&
"Should only visit an unlooped block once!");
1457 Worklist.push_back(PredBB);
1459 }
while (!Worklist.empty());
1468 for (
auto *
BB : llvm::concat<BasicBlock *const>(
1469 makeArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1471 OuterL->addBasicBlockToLoop(
BB, LI);
1474 for (
auto &BBAndL : ExitLoopMap) {
1475 auto *
BB = BBAndL.first;
1476 auto *OuterL = BBAndL.second;
1478 "Failed to put all blocks into outer loops!");
1485 for (
Loop *ChildL : OrigL) {
1486 auto *ClonedChildHeader =
1487 cast_or_null<BasicBlock>(VMap.
lookup(ChildL->getHeader()));
1488 if (!ClonedChildHeader || BlocksInClonedLoop.
count(ClonedChildHeader))
1492 for (
auto *ChildLoopBB : ChildL->blocks())
1494 "Cloned a child loop header but not all of that loops blocks!");
1498 *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1504 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1509 for (
auto &VMap : VMaps)
1510 if (
BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(
BB)))
1513 SuccBB->removePredecessor(ClonedBB);
1514 DeadBlocks.push_back(ClonedBB);
1526 BB->dropAllReferences();
1529 BB->eraseFromParent();
1545 while (!DeathCandidates.empty()) {
1550 DeathCandidates.push_back(SuccBB);
1567 for (
auto *
BB : DeadBlockSet)
1568 ParentL->getBlocksSet().erase(
BB);
1570 [&](
BasicBlock *
BB) { return DeadBlockSet.count(BB); });
1576 if (!DeadBlockSet.count(ChildL->getHeader()))
1579 assert(llvm::all_of(ChildL->blocks(),
1580 [&](BasicBlock *ChildBB) {
1581 return DeadBlockSet.count(ChildBB);
1583 "If the child loop header is dead all blocks in the child loop must "
1584 "be dead as well!");
1592 for (
auto *
BB : DeadBlockSet) {
1594 assert(!DT.getNode(
BB) &&
"Should already have cleared domtree!");
1595 LI.changeLoopFor(
BB,
nullptr);
1601 BB->dropAllReferences();
1606 for (
auto *
BB : DeadBlockSet)
1607 BB->eraseFromParent();
1640 assert(L.
contains(Pred) &&
"Found a predecessor of the loop header other "
1641 "than the preheader that is not part of the "
1647 if (LoopBlockSet.
insert(Pred).second && Pred != Header)
1648 Worklist.push_back(Pred);
1652 if (LoopBlockSet.
empty())
1653 return LoopBlockSet;
1656 while (!Worklist.empty()) {
1658 assert(LoopBlockSet.
count(
BB) &&
"Didn't put block into the loop set!");
1671 "Should not reach a loop *outside* this loop!");
1674 auto *InnerPH = InnerL->getLoopPreheader();
1675 assert(L.
contains(InnerPH) &&
"Cannot contain an inner loop block "
1676 "but not contain the inner loop "
1678 if (!LoopBlockSet.
insert(InnerPH).second)
1688 for (
auto *InnerBB : InnerL->blocks()) {
1689 if (InnerBB ==
BB) {
1691 "Block should already be in the set!");
1695 LoopBlockSet.
insert(InnerBB);
1700 Worklist.push_back(InnerPH);
1708 Worklist.push_back(Pred);
1711 assert(LoopBlockSet.
count(Header) &&
"Cannot fail to add the header!");
1715 return LoopBlockSet;
1739 Loop *ParentL =
nullptr;
1743 for (
auto *ExitBB : ExitBlocks)
1745 ExitLoops.push_back(ExitL);
1746 ExitsInLoops.push_back(ExitBB);
1747 if (!ParentL || (ParentL != ExitL && ParentL->
contains(ExitL)))
1759 if (!LoopBlockSet.empty() && L.
getParentLoop() != ParentL) {
1763 IL->getBlocksSet().erase(PH);
1765 IL->getBlocksSet().erase(
BB);
1767 return BB == PH || L.contains(BB);
1782 LoopBlockSet.empty()
1784 : std::stable_partition(
1785 Blocks.begin(), Blocks.end(),
1786 [&](
BasicBlock *
BB) { return LoopBlockSet.count(BB); });
1790 if (LoopBlockSet.empty())
1791 UnloopedBlocks.
insert(PH);
1794 for (
auto *
BB :
make_range(BlocksSplitI, Blocks.end()))
1796 Blocks.erase(BlocksSplitI, Blocks.end());
1808 auto RemoveUnloopedBlocksFromLoop =
1810 for (
auto *
BB : UnloopedBlocks)
1813 return UnloopedBlocks.count(BB);
1818 while (!UnloopedBlocks.
empty() && !ExitsInLoops.empty()) {
1819 assert(Worklist.empty() &&
"Didn't clear worklist!");
1820 assert(NewExitLoopBlocks.empty() &&
"Didn't clear loop set!");
1825 assert(ExitL.
contains(&L) &&
"Exit loop must contain the inner loop!");
1831 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->
getParentLoop())
1832 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1836 Worklist.push_back(ExitBB);
1846 if (!UnloopedBlocks.
erase(PredBB)) {
1847 assert((NewExitLoopBlocks.count(PredBB) ||
1849 "Predecessor not in a nested loop (or already visited)!");
1856 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
1858 assert(Inserted &&
"Should only visit an unlooped block once!");
1861 Worklist.push_back(PredBB);
1863 }
while (!Worklist.empty());
1868 for (
auto *
BB : NewExitLoopBlocks)
1875 NewExitLoopBlocks.clear();
1881 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1882 for (
auto *
BB : UnloopedBlocks)
1891 auto SubLoopsSplitI =
1892 LoopBlockSet.empty()
1894 : std::stable_partition(
1895 SubLoops.begin(), SubLoops.end(), [&](
Loop *SubL) {
1896 return LoopBlockSet.count(SubL->getHeader());
1898 for (
auto *HoistedL :
make_range(SubLoopsSplitI, SubLoops.end())) {
1899 HoistedLoops.push_back(HoistedL);
1900 HoistedL->setParentLoop(
nullptr);
1910 if (
auto *NewParentL = LI.
getLoopFor(HoistedL->getLoopPreheader()))
1911 NewParentL->addChildLoop(HoistedL);
1915 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
1918 if (Blocks.empty()) {
1919 assert(SubLoops.empty() &&
1920 "Failed to remove all subloops from the original loop!");
1935 template <
typename CallableT>
1938 DomWorklist.push_back(DT[
BB]);
1947 if (!Callable(
N->getBlock()))
1953 "Cannot visit a node twice when walking a tree!");
1954 DomWorklist.push_back(ChildN);
1956 }
while (!DomWorklist.empty());
1971 "Can only unswitch switches and conditional branch!");
1975 "Cannot have other invariants with full unswitching!");
1978 "Partial unswitching requires an instruction as the condition!");
1991 if (!FullUnswitch) {
1992 if (cast<Instruction>(BI->
getCondition())->getOpcode() != Instruction::Or) {
1995 "Only `or` and `and` instructions can combine invariants being "
2008 for (
auto Case :
SI->cases())
2009 if (Case.getCaseSuccessor() != RetainedSuccBB)
2010 UnswitchedSuccBBs.
insert(Case.getCaseSuccessor());
2012 assert(!UnswitchedSuccBBs.
count(RetainedSuccBB) &&
2013 "Should not unswitch the same successor we are retaining!");
2031 Loop *OuterExitL = &L;
2032 for (
auto *ExitBB : ExitBlocks) {
2034 if (!NewOuterExitL) {
2036 OuterExitL =
nullptr;
2039 if (NewOuterExitL != OuterExitL && NewOuterExitL->
contains(OuterExitL))
2040 OuterExitL = NewOuterExitL;
2059 for (
auto *SuccBB : llvm::concat<BasicBlock *const>(
2061 if (SuccBB->getUniquePredecessor() ||
2063 return PredBB == ParentBB || DT.
dominates(SuccBB, PredBB);
2066 DominatingSucc[
BB] = SuccBB;
2085 for (
auto *SuccBB : UnswitchedSuccBBs) {
2088 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2089 DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU);
2094 if (TI.
getMetadata(LLVMContext::MD_make_implicit)) {
2098 TI.
setMetadata(LLVMContext::MD_make_implicit,
nullptr);
2105 TI.
setMetadata(LLVMContext::MD_make_implicit,
nullptr);
2112 SplitBB->getTerminator()->eraseFromParent();
2116 SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI);
2120 ParentBB->getInstList().push_back(NewTI);
2129 assert(
SI &&
"Must either be a branch or switch!");
2132 assert(
SI->getDefaultDest() == RetainedSuccBB &&
2133 "Not retaining default successor!");
2134 SI->setDefaultDest(LoopPH);
2135 for (
auto &Case :
SI->cases())
2136 if (Case.getCaseSuccessor() == RetainedSuccBB)
2137 Case.setSuccessor(LoopPH);
2139 Case.setSuccessor(ClonedPHs.
find(Case.getCaseSuccessor())->second);
2145 DTUpdates.push_back(
2160 for (
auto &VMap : VMaps)
2175 assert(UnswitchedSuccBBs.size() == 1 &&
2176 "Only one possible unswitched block for a branch!");
2180 DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2190 "Not retaining default successor!");
2191 for (
auto &Case : NewSI->
cases())
2192 Case.getCaseSuccessor()->removePredecessor(
2200 DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2204 ParentBB->getTerminator()->eraseFromParent();
2208 BranchInst::Create(RetainedSuccBB, ParentBB);
2210 assert(BI &&
"Only branches have partial unswitching.");
2211 assert(UnswitchedSuccBBs.size() == 1 &&
2212 "Only one possible unswitched block for a branch!");
2217 *ClonedPH, *LoopPH);
2225 for (
auto &VMap : VMaps)
2245 for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2274 assert(UnswitchedSuccBBs.size() == 1 &&
2275 "Only one possible unswitched block for a branch!");
2287 bool ReplaceUnswitched = FullUnswitch || (Invariants.
size() == 1);
2295 for (
Value *Invariant : Invariants)
2298 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2305 U.set(ContinueReplacement);
2306 else if (ReplaceUnswitched &&
2308 U.set(UnswitchedReplacement);
2324 auto UpdateLoop = [&](
Loop &UpdateL) {
2326 UpdateL.verifyLoop();
2327 for (
Loop *ChildL : UpdateL) {
2328 ChildL->verifyLoop();
2329 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2330 "Perturbed a child loop's LCSSA form!");
2350 for (
Loop *UpdatedL :
2351 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2352 UpdateLoop(*UpdatedL);
2353 if (UpdatedL->isOutermost())
2354 OuterExitL =
nullptr;
2359 OuterExitL =
nullptr;
2364 if (OuterExitL != &L)
2365 for (
Loop *OuterL = ParentL; OuterL != OuterExitL;
2367 UpdateLoop(*OuterL);
2379 for (
Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2380 if (UpdatedL->getParentLoop() == ParentL)
2381 SibLoops.push_back(UpdatedL);
2382 UnswitchCB(IsStillLoop, SibLoops);
2405 auto BBCostIt = BBCostMap.
find(
N.getBlock());
2406 if (BBCostIt == BBCostMap.
end())
2410 auto DTCostIt = DTCostMap.
find(&
N);
2411 if (DTCostIt != DTCostMap.
end())
2412 return DTCostIt->second;
2417 N.begin(),
N.end(), BBCostIt->second,
2419 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2421 bool Inserted = DTCostMap.
insert({&
N, Cost}).second;
2423 assert(Inserted &&
"Should not insert a node while visiting children!");
2463 if (Successors.
insert(Succ).second)
2464 DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ});
2474 GuardedBlock->
setName(
"guarded");
2493 for (
auto *Succ : Successors)
2503 MSSAU->
moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2528 UnswitchCandidates) {
2541 NumCostMultiplierSkipped++;
2546 int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2547 : std::distance(LI.
begin(), LI.
end()));
2550 int UnswitchedClones = 0;
2551 for (
auto Candidate : UnswitchCandidates) {
2554 bool SkipExitingSuccessors = DT.
dominates(CondBlock, Latch);
2556 if (!SkipExitingSuccessors)
2562 return !SkipExitingSuccessors || L.
contains(SuccBB);
2564 UnswitchedClones +=
Log2_32(NonExitingSuccessors);
2572 unsigned ClonesPower =
2576 int SiblingsMultiplier =
2587 CostMultiplier =
std::min(SiblingsMultiplier * (1 << ClonesPower),
2591 <<
" (siblings " << SiblingsMultiplier <<
" * clones "
2592 << (1 << ClonesPower) <<
")"
2593 <<
" for unswitch candidate: " << TI <<
"\n");
2594 return CostMultiplier;
2608 bool CollectGuards =
false;
2612 if (GuardDecl && !GuardDecl->use_empty())
2613 CollectGuards =
true;
2623 auto *
Cond = cast<IntrinsicInst>(&
I)->getArgOperand(0);
2626 UnswitchCandidates.push_back({&
I, {
Cond}});
2629 if (
auto *
SI = dyn_cast<SwitchInst>(
BB->getTerminator())) {
2632 if (!isa<Constant>(
SI->getCondition()) &&
2634 UnswitchCandidates.push_back({
SI, {
SI->getCondition()}});
2638 auto *BI = dyn_cast<BranchInst>(
BB->getTerminator());
2639 if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
2640 BI->getSuccessor(0) == BI->getSuccessor(1))
2644 UnswitchCandidates.push_back({BI, {BI->getCondition()}});
2648 Instruction &CondI = *cast<Instruction>(BI->getCondition());
2649 if (CondI.
getOpcode() != Instruction::And &&
2655 if (Invariants.
empty())
2658 UnswitchCandidates.push_back({BI,
std::move(Invariants)});
2662 if (UnswitchCandidates.empty())
2673 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
2683 for (
auto *ExitBB : ExitBlocks)
2684 if (isa<CleanupPadInst>(ExitBB->getFirstNonPHI())) {
2685 dbgs() <<
"Cannot unswitch because of cleanuppad in exit block\n";
2690 dbgs() <<
"Considering " << UnswitchCandidates.size()
2691 <<
" non-trivial loop invariant conditions for unswitching.\n");
2699 CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
2709 ? TargetTransformInfo::TCK_CodeSize
2710 : TargetTransformInfo::TCK_SizeAndLatency;
2714 for (
auto &
I : *
BB) {
2718 if (
I.getType()->isTokenTy() &&
I.isUsedOutsideOfBlock(
BB))
2720 if (
auto *CB = dyn_cast<CallBase>(&
I))
2721 if (CB->isConvergent() || CB->cannotDuplicate())
2726 assert(Cost >= 0 &&
"Must not have negative costs!");
2728 assert(LoopCost >= 0 &&
"Must not have negative loop costs!");
2729 BBCostMap[
BB] = Cost;
2758 if (!Visited.
insert(SuccBB).second)
2765 if (!FullUnswitch) {
2766 auto &BI = cast<BranchInst>(TI);
2767 if (cast<Instruction>(BI.getCondition())->getOpcode() ==
2769 if (SuccBB == BI.getSuccessor(1))
2772 assert(cast<Instruction>(BI.getCondition())->getOpcode() ==
2774 "Only `and` and `or` conditions can result in a partial "
2776 if (SuccBB == BI.getSuccessor(0))
2785 if (SuccBB->getUniquePredecessor() ||
2787 return PredBB == &
BB || DT.
dominates(SuccBB, PredBB);
2790 assert(Cost <= LoopCost &&
2791 "Non-duplicated cost should never exceed total loop cost!");
2800 int SuccessorsCount =
isGuard(&TI) ? 2 : Visited.
size();
2801 assert(SuccessorsCount > 1 &&
2802 "Cannot unswitch a condition without multiple distinct successors!");
2803 return (LoopCost - Cost) * (SuccessorsCount - 1);
2808 for (
auto &TerminatorAndInvariants : UnswitchCandidates) {
2813 TI, !BI || (Invariants.
size() == 1 &&
2818 int CostMultiplier =
2822 "cost multiplier needs to be in the range of 1..UnswitchThreshold");
2823 CandidateCost *= CostMultiplier;
2825 <<
" (multiplier: " << CostMultiplier <<
")"
2826 <<
" for unswitch candidate: " << TI <<
"\n");
2829 <<
" for unswitch candidate: " << TI <<
"\n");
2832 if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
2833 BestUnswitchTI = &TI;
2834 BestUnswitchCost = CandidateCost;
2835 BestUnswitchInvariants = Invariants;
2838 assert(BestUnswitchTI &&
"Failed to find loop unswitch candidate");
2842 << BestUnswitchCost <<
"\n");
2849 ExitBlocks, DT, LI, MSSAU);
2852 << BestUnswitchCost <<
") terminator: " << *BestUnswitchTI
2855 ExitBlocks, DT, LI, AC, UnswitchCB, SE, MSSAU);
2886 "Loops must be in LCSSA form before unswitching.");
2896 UnswitchCB(
true, {});
2935 LLVM_DEBUG(
dbgs() <<
"Unswitching loop in " <<
F.getName() <<
": " << L
2940 std::string LoopName = std::string(L.
getName());
2942 auto UnswitchCB = [&L, &U, &LoopName](
bool CurrentLoopValid,
2945 if (!NewLoops.empty())
2946 U.addSiblingLoops(NewLoops);
2950 if (CurrentLoopValid)
2951 U.revisitCurrentLoop();
2953 U.markLoopAsDeleted(L, LoopName);
2981 class SimpleLoopUnswitchLegacyPass :
public LoopPass {
2987 explicit SimpleLoopUnswitchLegacyPass(
bool NonTrivial =
false)
3014 LLVM_DEBUG(
dbgs() <<
"Unswitching loop in " <<
F.getName() <<
": " << *L
3017 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3018 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3019 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
3020 auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
3024 MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
3028 auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
3029 auto *SE = SEWP ? &SEWP->getSE() :
nullptr;
3031 auto UnswitchCB = [&L, &LPM](
bool CurrentLoopValid,
3034 for (
auto *NewL : NewLoops)
3040 if (CurrentLoopValid)
3049 bool Changed =
unswitchLoop(*L, DT, LI, AC,
TTI, NonTrivial, UnswitchCB, SE,
3064 "Simple unswitch loops",
false,
false)
3075 return new SimpleLoopUnswitchLegacyPass(NonTrivial);
A set of analyses that are preserved following a run of a transformation pass.
bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
Pass * createSimpleLoopUnswitchLegacyPass(bool NonTrivial=false)
Create the legacy pass object for the simple loop unswitcher.
pred_range predecessors(BasicBlock *BB)
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
static StringRef getName(Value *V)
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
This class represents lattice values for constants.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Vector Rotate Left Mask Mask Insert
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
const Function * getParent() const
Return the enclosing method, or null if none.
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
CaseWeightOpt getSuccessorWeight(unsigned idx)
Represents a single loop in the control flow graph.
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
size_type size() const
Determine the number of elements in the SetVector.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, Instruction *InsertBefore=nullptr)
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
The main scalar evolution driver.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, TargetTransformInfo &TTI, function_ref< void(bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::begin iterator begin()
bool dominates(const Value *Def, const Use &U) const
Return true if value Def dominates use U, in the sense that Def is available at U,...
static int CalculateUnswitchCostMultiplier(Instruction &TI, Loop &L, LoopInfo &LI, DominatorTree &DT, ArrayRef< std::pair< Instruction *, TinyPtrVector< Value * >>> UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
The legacy pass manager's analysis pass to compute loop information.
static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB, BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
static constexpr UpdateKind Insert
void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, TargetTransformInfo &TTI, bool NonTrivial, function_ref< void(bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch control flow predicated on loop invariant conditions.
cl::opt< bool > EnableMSSALoopDependency
Enables memory ssa as a dependency for loop passes.
LLVM_NODISCARD T pop_back_val()
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
BasicBlock * getDefaultDest() const
succ_range successors(Instruction *I)
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&... args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
void swapSuccessors()
Swap the successors of this branch instruction.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
LLVM Basic Block Representation.
constexpr const T * getPointer() const
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
constexpr bool hasValue() const
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
static void replaceLoopInvariantUses(Loop &L, Value *Invariant, Constant &Replacement)
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
This is the shared class of boolean and integer constants.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
Legacy analysis pass which computes MemorySSA.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
iterator begin()
Instruction iterator methods.
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
Represent the analysis usage information of a pass.
void push_back(EltTy NewVal)
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
iterator_range< block_iterator > blocks() const
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
iterator_range< use_iterator > uses()
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
Legacy analysis pass which computes a DominatorTree.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
STATISTIC(NumFunctions, "Total number of functions")
void registerAssumption(CallInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
void setName(const Twine &Name)
Change the name of the value.
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
StringRef getName() const
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy >> VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Direction
An enum for the direction of the loop.
void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LoopT * AllocateLoop(ArgsTy &&... Args)
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(Loop &L, Instruction &Root, LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
Value * getCondition() const
An efficient, type-erasing, non-owning reference to a callable.
void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc)
Insert code to test a set of loop invariant values, and conditionally branch on them.
This is an important base class in LLVM.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
void setDefaultDest(BasicBlock *DefaultCase)
Module * getParent()
Get the module that this global value is contained inside of...
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
@ Fast
Fast - This calling convention attempts to make calls as fast as possible (e.g.
void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Encapsulates MemorySSA, including all data associated with memory accesses.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
initializer< Ty > init(const Ty &Val)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find iterator find(const_arg_type_t< KeyT > Val)
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", "Simple unswitch loops", false, false) INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
unsigned getLoopDepth() const
Return the nesting level of this loop.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
std::vector< BlockT * > & getBlocksVector()
Return a direct, mutable handle to the blocks vector so that we can mutate it efficiently with techni...
void forgetTopmostLoop(const Loop *L)
void verifyMemorySSA() const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
An analysis that produces MemorySSA for a function.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool insert(const value_type &X)
Insert a new element into the SetVector.
static Loop * getTopMostExitingLoop(BasicBlock *ExitBB, LoopInfo &LI)
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops)
Rebuild a loop after unswitching removes some subset of blocks and edges.
std::vector< LoopT * > & getSubLoopsVector()
An immutable pass that tracks lazily created AssumptionCache objects.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< MachineOperand, 4 > Cond
A cache of @llvm.assume calls within a function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
void initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry &)
ConstantIntT * getCaseValue() const
Resolves case value for current case.
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
LLVMContext & getContext() const
All values hold a context through their type.
simple loop Simple unswitch loops
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
bool pred_empty(const BasicBlock *BB)
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void setArgOperand(unsigned i, Value *v)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
static ConstantInt * getFalse(LLVMContext &Context)
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
const Instruction & front() const
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
void stable_sort(R &&Range)
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
static ConstantInt * getTrue(LLVMContext &Context)
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
void markLoopAsDeleted(Loop &L)
TargetTransformInfo & TTI
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, function_ref< void(bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end iterator end()
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
SmallPtrSetImpl< const BlockT * > & getBlocksSet()
Return a direct, mutable handle to the blocks set so that we can mutate it efficiently.
BlockT * getHeader() const
void sort(IteratorTy Start, IteratorTy End)
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool isRecursivelyLCSSAForm(const DominatorTree &DT, const LoopInfo &LI) const
Return true if this Loop and all inner subloops are in LCSSA form.
SymbolTableList< Instruction >::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
A wrapper class for inspecting calls to intrinsic functions.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
const InstListType & getInstList() const
Return the underlying instruction list container.
Pass interface - Implemented by all 'passes'.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Value * getArgOperand(unsigned i) const
LLVM_NODISCARD bool empty() const
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
const BasicBlock * getParent() const
bool isSafeToClone() const
Return true if the loop body is safe to clone in practice.
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
static Constant * getTrue(Type *Ty)
For a boolean type or a vector of boolean type, return true or a vector with every element true.
size_t size() const
size - Get the array size.
Align max(MaybeAlign Lhs, Align Rhs)
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
A SetVector that performs no allocations if smaller than a certain size.
A container for analyses that lazily runs them and caches their results.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
AnalysisUsage & addRequired()
bool VerifyMemorySSA
Enables verification of MemorySSA.
Instruction * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, DominatorTree *DT, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
Conditional or Unconditional Branch instruction.
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
void reserve(size_type N)
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
LLVM Value Representation.
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
static constexpr UpdateKind Delete
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
A Use represents the edge between a Value definition and its users.
reference emplace_back(ArgTypes &&... Args)
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Build the cloned blocks for an unswitched copy of the given loop.