69#define DEBUG_TYPE "simple-loop-unswitch"
74STATISTIC(NumBranches,
"Number of branches unswitched");
75STATISTIC(NumSwitches,
"Number of switches unswitched");
76STATISTIC(NumSelects,
"Number of selects turned into branches for unswitching");
77STATISTIC(NumGuards,
"Number of guards turned into branches for unswitching");
78STATISTIC(NumTrivial,
"Number of unswitches that are trivial");
80 NumCostMultiplierSkipped,
81 "Number of unswitch candidates that had their cost multiplier skipped");
83 "Number of invariant conditions injected and unswitched");
87 cl::desc(
"Forcibly enables non-trivial loop unswitching rather than "
88 "following the configuration passed into the pass."));
92 cl::desc(
"The cost threshold for unswitching a loop."));
96 cl::desc(
"Enable unswitch cost multiplier that prohibits exponential "
97 "explosion in nontrivial unswitch."));
100 cl::desc(
"Toplevel siblings divisor for cost multiplier."));
103 cl::desc(
"Number of unswitch candidates that are ignored when calculating "
104 "cost multiplier."));
107 cl::desc(
"If enabled, simple loop unswitching will also consider "
108 "llvm.experimental.guard intrinsics as unswitch candidates."));
110 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
112 cl::desc(
"If enabled, drop make.implicit metadata in unswitched implicit "
113 "null checks to save time analyzing if we can keep it."));
116 cl::desc(
"Max number of memory uses to explore during "
117 "partial unswitching analysis"),
121 cl::desc(
"If enabled, the freeze instruction will be added to condition "
122 "of loop unswitch to prevent miscompilation."));
125 "simple-loop-unswitch-inject-invariant-conditions",
cl::Hidden,
126 cl::desc(
"Whether we should inject new invariants and unswitch them to "
127 "eliminate some existing (non-invariant) conditions."),
131 "simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
133 "unswitch on them to eliminate branches that are "
134 "not-taken 1/<this option> times or less."),
145 : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
148struct InjectedInvariant {
156 : Pred(Pred),
LHS(
LHS),
RHS(
RHS), InLoopSucc(InLoopSucc) {}
159struct NonTrivialUnswitchCandidate {
162 std::optional<InstructionCost>
Cost;
163 std::optional<InjectedInvariant> PendingInjection;
164 NonTrivialUnswitchCandidate(
166 std::optional<InstructionCost>
Cost = std::nullopt,
167 std::optional<InjectedInvariant> PendingInjection = std::nullopt)
168 : TI(TI), Invariants(Invariants),
Cost(
Cost),
169 PendingInjection(PendingInjection) {};
171 bool hasPendingInjection()
const {
return PendingInjection.has_value(); }
195 assert(!L.isLoopInvariant(&Root) &&
196 "Only need to walk the graph if root itself is not invariant.");
209 for (
Value *OpV :
I.operand_values()) {
211 if (isa<Constant>(OpV))
215 if (L.isLoopInvariant(OpV)) {
226 if (Visited.
insert(OpI).second)
230 }
while (!Worklist.
empty());
237 assert(!isa<Constant>(Invariant) &&
"Why are we unswitching on a constant?");
242 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
245 if (UserI && L.contains(UserI))
256 auto *PN = dyn_cast<PHINode>(&
I);
263 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
279 for (
Value *Inv : Invariants) {
288 Direction ? &NormalSucc : &UnswitchedSucc);
297 for (
auto *Val :
reverse(ToDuplicate)) {
311 auto *DefiningAccess = MemUse->getDefiningAccess();
313 while (L.contains(DefiningAccess->getBlock())) {
316 if (
auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
318 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
320 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
331 Direction ? &NormalSucc : &UnswitchedSucc);
348 for (
auto i : seq<int>(0, PN.getNumOperands())) {
349 assert(PN.getIncomingBlock(i) == &OldExitingBB &&
350 "Found incoming block different from unique predecessor!");
351 PN.setIncomingBlock(i, &OldPH);
368 assert(&ExitBB != &UnswitchedBB &&
369 "Must have different loop exit and unswitched blocks!");
373 PN.getName() +
".split");
374 NewPN->insertBefore(InsertPt);
385 for (
int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
386 if (PN.getIncomingBlock(i) != &OldExitingBB)
392 PN.removeIncomingValue(i);
394 NewPN->addIncoming(
Incoming, &OldPH);
399 PN.replaceAllUsesWith(NewPN);
400 NewPN->addIncoming(&PN, &ExitBB);
413 Loop *OldParentL = L.getParentLoop();
418 L.getExitBlocks(Exits);
419 Loop *NewParentL =
nullptr;
420 for (
auto *ExitBB : Exits)
422 if (!NewParentL || NewParentL->
contains(ExitL))
425 if (NewParentL == OldParentL)
431 "Can only hoist this loop up the nest!");
436 "Parent loop of this loop should contain this loop's preheader!");
451 for (
Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
455 return BB == &Preheader || L.contains(BB);
458 OldContainingL->getBlocksSet().erase(&Preheader);
460 OldContainingL->getBlocksSet().erase(BB);
483 Loop *Current = TopMost;
513 LLVM_DEBUG(
dbgs() <<
" Trying to unswitch branch: " << BI <<
"\n");
520 bool FullUnswitch =
false;
523 if (L.isLoopInvariant(
Cond)) {
527 if (
auto *CondInst = dyn_cast<Instruction>(
Cond))
529 if (Invariants.
empty()) {
536 bool ExitDirection =
true;
537 int LoopExitSuccIdx = 0;
539 if (L.contains(LoopExitBB)) {
540 ExitDirection =
false;
543 if (L.contains(LoopExitBB)) {
548 auto *ContinueBB = BI.
getSuccessor(1 - LoopExitSuccIdx);
551 LLVM_DEBUG(
dbgs() <<
" Loop exit PHI's aren't loop-invariant!\n");
564 "non-full unswitch!\n");
570 dbgs() <<
" unswitching trivial invariant conditions for: " << BI
572 for (
Value *Invariant : Invariants) {
573 dbgs() <<
" " << *Invariant <<
" == true";
574 if (Invariant != Invariants.back())
606 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
608 "A branch's parent isn't a predecessor!");
609 UnswitchedBB = LoopExitBB;
612 SplitBlock(LoopExitBB, LoopExitBB->begin(), &DT, &LI, MSSAU,
"",
false);
645 "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
649 "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
652 *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
663 Updates.
push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
675 Term->eraseFromParent();
685 if (UnswitchedBB == LoopExitBB)
689 *ParentBB, *OldPH, FullUnswitch);
700 for (
Value *Invariant : Invariants)
746 LLVM_DEBUG(
dbgs() <<
" Trying to unswitch switch: " << SI <<
"\n");
747 Value *LoopCond = SI.getCondition();
750 if (!L.isLoopInvariant(LoopCond))
753 auto *ParentBB = SI.getParent();
760 auto IsTriviallyUnswitchableExitBlock = [&](
BasicBlock &BBToCheck) {
762 if (L.contains(&BBToCheck))
771 auto *TI = BBToCheck.getTerminator();
772 bool isUnreachable = isa<UnreachableInst>(TI);
773 return !isUnreachable ||
774 (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI));
778 for (
auto Case : SI.cases())
779 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
780 ExitCaseIndices.
push_back(Case.getCaseIndex());
784 if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
785 DefaultExitBB = SI.getDefaultDest();
786 }
else if (ExitCaseIndices.
empty())
801 if (!ExitL || ExitL->
contains(OuterL))
804 for (
unsigned Index : ExitCaseIndices) {
805 auto CaseI = SI.case_begin() +
Index;
808 if (!ExitL || ExitL->
contains(OuterL))
822 SI.setDefaultDest(
nullptr);
830 ExitCases.reserve(ExitCaseIndices.
size());
835 auto CaseI = SI.case_begin() +
Index;
838 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
846 if (SI.getNumCases() > 0 &&
848 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
850 CommonSuccBB = SI.case_begin()->getCaseSuccessor();
851 if (!DefaultExitBB) {
855 if (SI.getNumCases() == 0)
856 CommonSuccBB = SI.getDefaultDest();
857 else if (SI.getDefaultDest() != CommonSuccBB)
858 CommonSuccBB =
nullptr;
887 UnswitchedExitBBs.
insert(DefaultExitBB);
895 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
900 for (
auto &ExitCase :
reverse(ExitCases)) {
908 if (UnswitchedExitBBs.
insert(ExitBB).second)
915 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
924 std::get<1>(ExitCase) = SplitExitBB;
929 for (
auto &ExitCase :
reverse(ExitCases)) {
931 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
933 NewSIW.
addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
944 for (
const auto &Case : SI.cases())
947 }
else if (DefaultCaseWeight) {
950 for (
const auto &Case : SI.cases()) {
953 "case weight must be defined as default case weight is defined");
968 bool SkippedFirst = DefaultExitBB ==
nullptr;
969 for (
auto Case : SI.cases()) {
971 "Non-common successor!");
984 }
else if (DefaultExitBB) {
985 assert(SI.getNumCases() > 0 &&
986 "If we had no cases we'd have a common successor!");
991 auto LastCaseI = std::prev(SI.case_end());
993 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
1004 for (
auto *UnswitchedExitBB : UnswitchedExitBBs) {
1008 for (
auto SplitUnswitchedPair : SplitExitBBMap) {
1009 DTUpdates.
push_back({DT.
Delete, ParentBB, SplitUnswitchedPair.first});
1021 assert(DT.
verify(DominatorTree::VerificationLevel::Fast));
1051 bool Changed =
false;
1066 Visited.
insert(CurrentBB);
1073 if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1081 if (
auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1085 if (isa<Constant>(SI->getCondition()))
1099 auto *BI = dyn_cast<BranchInst>(CurrentBB->
getTerminator());
1100 if (!BI || BI->isConditional())
1103 CurrentBB = BI->getSuccessor(0);
1107 auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1115 if (!BI->isConditional() ||
1130 if (BI->isConditional())
1134 CurrentBB = BI->getSuccessor(0);
1139 }
while (L.contains(CurrentBB) && Visited.
insert(CurrentBB).second);
1177 NewBlocks.
reserve(L.getNumBlocks() + ExitBlocks.
size());
1188 VMap[OldBB] = NewBB;
1196 auto It = DominatingSucc.
find(BB);
1197 return It != DominatingSucc.
end() && It->second != UnswitchedSuccBB;
1201 auto *ClonedPH = CloneBlock(LoopPH);
1204 for (
auto *LoopBB : L.blocks())
1205 if (!SkipBlock(LoopBB))
1211 for (
auto *ExitBB : ExitBlocks) {
1212 if (SkipBlock(ExitBB))
1220 auto *MergeBB =
SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
1225 MergeBB->takeName(ExitBB);
1226 ExitBB->setName(
Twine(MergeBB->getName()) +
".split");
1229 auto *ClonedExitBB = CloneBlock(ExitBB);
1230 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1231 "Exit block should have been split to have one successor!");
1232 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1233 "Cloned exit block has the wrong successor!");
1239 std::prev(ClonedExitBB->end())))) {
1246 (isa<PHINode>(
I) || isa<LandingPadInst>(
I) || isa<CatchPadInst>(
I)) &&
1247 "Bad instruction in exit block!");
1249 assert(VMap.
lookup(&
I) == &ClonedI &&
"Mismatch in the value map!");
1252 if (SE && isa<PHINode>(
I))
1259 MergePN->insertBefore(InsertPt);
1260 MergePN->setDebugLoc(InsertPt->getDebugLoc());
1261 I.replaceAllUsesWith(MergePN);
1262 MergePN->addIncoming(&
I, ExitBB);
1263 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1272 Module *M = ClonedPH->getParent()->getParent();
1273 for (
auto *ClonedBB : NewBlocks)
1279 if (
auto *
II = dyn_cast<AssumeInst>(&
I))
1285 for (
auto *LoopBB : L.blocks())
1286 if (SkipBlock(LoopBB))
1288 if (
auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.
lookup(SuccBB)))
1289 for (
PHINode &PN : ClonedSuccBB->phis())
1290 PN.removeIncomingValue(LoopBB,
false);
1294 auto *ClonedParentBB = cast<BasicBlock>(VMap.
lookup(ParentBB));
1296 if (SuccBB == UnswitchedSuccBB)
1299 auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.
lookup(SuccBB));
1303 ClonedSuccBB->removePredecessor(ClonedParentBB,
1309 auto *ClonedSuccBB = cast<BasicBlock>(VMap.
lookup(UnswitchedSuccBB));
1310 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1313 Value *ClonedConditionToErase =
nullptr;
1314 if (
auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1315 ClonedConditionToErase = BI->getCondition();
1316 else if (
auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1317 ClonedConditionToErase = SI->getCondition();
1323 if (ClonedConditionToErase)
1330 for (
PHINode &PN : ClonedSuccBB->phis()) {
1334 for (
int i = PN.getNumOperands() - 1; i >= 0; --i) {
1335 if (PN.getIncomingBlock(i) != ClonedParentBB)
1341 PN.removeIncomingValue(i,
false);
1347 for (
auto *ClonedBB : NewBlocks) {
1349 if (SuccSet.
insert(SuccBB).second)
1350 DTUpdates.
push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1365 auto AddClonedBlocksToLoop = [&](
Loop &OrigL,
Loop &ClonedL) {
1366 assert(ClonedL.getBlocks().empty() &&
"Must start with an empty loop!");
1368 for (
auto *BB : OrigL.
blocks()) {
1369 auto *ClonedBB = cast<BasicBlock>(VMap.
lookup(BB));
1370 ClonedL.addBlockEntry(ClonedBB);
1383 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1395 LoopsToClone.
push_back({ClonedRootL, ChildL});
1397 Loop *ClonedParentL, *L;
1398 std::tie(ClonedParentL, L) = LoopsToClone.
pop_back_val();
1401 AddClonedBlocksToLoop(*L, *ClonedL);
1403 LoopsToClone.
push_back({ClonedL, ChildL});
1404 }
while (!LoopsToClone.
empty());
1425 Loop *ClonedL =
nullptr;
1430 auto *ClonedPH = cast<BasicBlock>(VMap.
lookup(OrigPH));
1431 auto *ClonedHeader = cast<BasicBlock>(VMap.
lookup(OrigHeader));
1437 Loop *ParentL =
nullptr;
1441 for (
auto *ExitBB : ExitBlocks)
1442 if (
auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.
lookup(ExitBB)))
1444 ExitLoopMap[ClonedExitBB] = ExitL;
1445 ClonedExitsInLoops.
push_back(ClonedExitBB);
1446 if (!ParentL || (ParentL != ExitL && ParentL->
contains(ExitL)))
1451 "The computed parent loop should always contain (or be) the parent of "
1452 "the original loop.");
1459 for (
auto *BB : OrigL.
blocks())
1460 if (
auto *ClonedBB = cast_or_null<BasicBlock>(VMap.
lookup(BB)))
1461 ClonedLoopBlocks.
insert(ClonedBB);
1472 if (Pred == ClonedPH)
1477 assert(ClonedLoopBlocks.
count(Pred) &&
"Found a predecessor of the loop "
1478 "header other than the preheader "
1479 "that is not part of the loop!");
1484 if (BlocksInClonedLoop.
insert(Pred).second && Pred != ClonedHeader)
1491 if (!BlocksInClonedLoop.
empty()) {
1492 BlocksInClonedLoop.
insert(ClonedHeader);
1494 while (!Worklist.
empty()) {
1497 "Didn't put block into the loop set!");
1505 if (ClonedLoopBlocks.
count(Pred) &&
1506 BlocksInClonedLoop.
insert(Pred).second)
1525 for (
auto *BB : OrigL.
blocks()) {
1526 auto *ClonedBB = cast_or_null<BasicBlock>(VMap.
lookup(BB));
1527 if (!ClonedBB || !BlocksInClonedLoop.
count(ClonedBB))
1539 for (
Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1540 PL->addBlockEntry(ClonedBB);
1547 for (
Loop *ChildL : OrigL) {
1548 auto *ClonedChildHeader =
1549 cast_or_null<BasicBlock>(VMap.
lookup(ChildL->getHeader()));
1550 if (!ClonedChildHeader || !BlocksInClonedLoop.
count(ClonedChildHeader))
1556 for (
auto *ChildLoopBB : ChildL->blocks())
1558 cast<BasicBlock>(VMap.
lookup(ChildLoopBB))) &&
1559 "Child cloned loop has a header within the cloned outer "
1560 "loop but not all of its blocks!");
1575 if (BlocksInClonedLoop.
empty())
1576 UnloopedBlockSet.
insert(ClonedPH);
1577 for (
auto *ClonedBB : ClonedLoopBlocks)
1578 if (!BlocksInClonedLoop.
count(ClonedBB))
1579 UnloopedBlockSet.
insert(ClonedBB);
1585 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1587 return ExitLoopMap.
lookup(
LHS)->getLoopDepth() <
1588 ExitLoopMap.
lookup(
RHS)->getLoopDepth();
1593 while (!UnloopedBlockSet.
empty() && !OrderedClonedExitsInLoops.empty()) {
1594 assert(Worklist.
empty() &&
"Didn't clear worklist!");
1596 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1611 if (!UnloopedBlockSet.
erase(PredBB)) {
1613 (BlocksInClonedLoop.
count(PredBB) || ExitLoopMap.
count(PredBB)) &&
1614 "Predecessor not mapped to a loop!");
1621 bool Inserted = ExitLoopMap.
insert({PredBB, ExitL}).second;
1623 assert(Inserted &&
"Should only visit an unlooped block once!");
1628 }
while (!Worklist.
empty());
1637 for (
auto *BB : llvm::concat<BasicBlock *const>(
1638 ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1640 OuterL->addBasicBlockToLoop(BB, LI);
1643 for (
auto &BBAndL : ExitLoopMap) {
1644 auto *BB = BBAndL.first;
1645 auto *OuterL = BBAndL.second;
1647 "Failed to put all blocks into outer loops!");
1654 for (
Loop *ChildL : OrigL) {
1655 auto *ClonedChildHeader =
1656 cast_or_null<BasicBlock>(VMap.
lookup(ChildL->getHeader()));
1657 if (!ClonedChildHeader || BlocksInClonedLoop.
count(ClonedChildHeader))
1661 for (
auto *ChildLoopBB : ChildL->blocks())
1663 "Cloned a child loop header but not all of that loops blocks!");
1667 *ChildL, ExitLoopMap.
lookup(ClonedChildHeader), VMap, LI));
1673 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1677 for (
BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1678 for (
const auto &VMap : VMaps)
1679 if (
BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1682 SuccBB->removePredecessor(ClonedBB);
1695 BB->dropAllReferences();
1698 BB->eraseFromParent();
1715 DeathCandidates.
append(L.blocks().begin(), L.blocks().end());
1716 while (!DeathCandidates.
empty()) {
1720 SuccBB->removePredecessor(BB);
1737 for (
Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1738 for (
auto *BB : DeadBlockSet)
1739 ParentL->getBlocksSet().erase(BB);
1741 [&](
BasicBlock *BB) { return DeadBlockSet.count(BB); });
1747 if (!DeadBlockSet.count(ChildL->getHeader()))
1750 assert(llvm::all_of(ChildL->blocks(),
1751 [&](BasicBlock *ChildBB) {
1752 return DeadBlockSet.count(ChildBB);
1754 "If the child loop header is dead all blocks in the child loop must "
1755 "be dead as well!");
1766 for (
auto *BB : DeadBlockSet) {
1768 assert(!DT.getNode(BB) &&
"Should already have cleared domtree!");
1769 LI.changeLoopFor(BB,
nullptr);
1775 BB->dropAllReferences();
1780 for (
auto *BB : DeadBlockSet)
1781 BB->eraseFromParent();
1799 auto *PH = L.getLoopPreheader();
1800 auto *Header = L.getHeader();
1814 assert(L.contains(Pred) &&
"Found a predecessor of the loop header other "
1815 "than the preheader that is not part of the "
1821 if (LoopBlockSet.
insert(Pred).second && Pred != Header)
1826 if (LoopBlockSet.
empty())
1827 return LoopBlockSet;
1830 while (!Worklist.
empty()) {
1832 assert(LoopBlockSet.
count(BB) &&
"Didn't put block into the loop set!");
1844 assert(L.contains(InnerL) &&
1845 "Should not reach a loop *outside* this loop!");
1848 auto *InnerPH = InnerL->getLoopPreheader();
1849 assert(L.contains(InnerPH) &&
"Cannot contain an inner loop block "
1850 "but not contain the inner loop "
1852 if (!LoopBlockSet.
insert(InnerPH).second)
1862 for (
auto *InnerBB : InnerL->blocks()) {
1863 if (InnerBB == BB) {
1865 "Block should already be in the set!");
1869 LoopBlockSet.
insert(InnerBB);
1881 if (L.contains(Pred) && LoopBlockSet.
insert(Pred).second)
1885 assert(LoopBlockSet.
count(Header) &&
"Cannot fail to add the header!");
1889 return LoopBlockSet;
1910 auto *PH = L.getLoopPreheader();
1914 Loop *ParentL =
nullptr;
1918 for (
auto *ExitBB : ExitBlocks)
1922 if (!ParentL || (ParentL != ExitL && ParentL->
contains(ExitL)))
1934 if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1936 for (
Loop *IL = L.getParentLoop(); IL != ParentL;
1938 IL->getBlocksSet().erase(PH);
1939 for (
auto *BB : L.blocks())
1940 IL->getBlocksSet().erase(BB);
1942 return BB == PH || L.contains(BB);
1947 L.getParentLoop()->removeChildLoop(&L);
1955 auto &
Blocks = L.getBlocksVector();
1957 LoopBlockSet.empty()
1959 : std::stable_partition(
1961 [&](
BasicBlock *BB) { return LoopBlockSet.count(BB); });
1965 if (LoopBlockSet.empty())
1966 UnloopedBlocks.
insert(PH);
1970 L.getBlocksSet().erase(BB);
1981 Loop *PrevExitL = L.getParentLoop();
1983 auto RemoveUnloopedBlocksFromLoop =
1985 for (
auto *BB : UnloopedBlocks)
1986 L.getBlocksSet().erase(BB);
1988 return UnloopedBlocks.count(BB);
1993 while (!UnloopedBlocks.
empty() && !ExitsInLoops.
empty()) {
1994 assert(Worklist.
empty() &&
"Didn't clear worklist!");
1995 assert(NewExitLoopBlocks.empty() &&
"Didn't clear loop set!");
2000 assert(ExitL.
contains(&L) &&
"Exit loop must contain the inner loop!");
2006 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->
getParentLoop())
2007 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2021 if (!UnloopedBlocks.
erase(PredBB)) {
2022 assert((NewExitLoopBlocks.count(PredBB) ||
2024 "Predecessor not in a nested loop (or already visited)!");
2031 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
2033 assert(Inserted &&
"Should only visit an unlooped block once!");
2038 }
while (!Worklist.
empty());
2043 for (
auto *BB : NewExitLoopBlocks)
2045 if (BBL == &L || !L.contains(BBL))
2050 NewExitLoopBlocks.clear();
2056 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2057 for (
auto *BB : UnloopedBlocks)
2059 if (BBL == &L || !L.contains(BBL))
2065 auto &SubLoops = L.getSubLoopsVector();
2066 auto SubLoopsSplitI =
2067 LoopBlockSet.empty()
2069 : std::stable_partition(
2070 SubLoops.begin(), SubLoops.end(), [&](
Loop *SubL) {
2071 return LoopBlockSet.count(SubL->getHeader());
2073 for (
auto *HoistedL :
make_range(SubLoopsSplitI, SubLoops.end())) {
2075 HoistedL->setParentLoop(
nullptr);
2085 if (
auto *NewParentL = LI.
getLoopFor(HoistedL->getLoopPreheader()))
2086 NewParentL->addChildLoop(HoistedL);
2090 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2094 assert(SubLoops.empty() &&
2095 "Failed to remove all subloops from the original loop!");
2096 if (
Loop *ParentL = L.getParentLoop())
2114template <
typename CallableT>
2126 if (!Callable(
N->getBlock()))
2132 "Cannot visit a node twice when walking a tree!");
2135 }
while (!DomWorklist.
empty());
2139 bool CurrentLoopValid,
bool PartiallyInvariant,
2142 if (!NewLoops.
empty())
2143 U.addSiblingLoops(NewLoops);
2147 if (CurrentLoopValid) {
2148 if (PartiallyInvariant) {
2151 auto &Context = L.getHeader()->getContext();
2154 MDString::get(Context,
"llvm.loop.unswitch.partial.disable"));
2156 Context, L.getLoopID(), {
"llvm.loop.unswitch.partial"},
2157 {DisableUnswitchMD});
2158 L.setLoopID(NewLoopID);
2159 }
else if (InjectedCondition) {
2161 auto &Context = L.getHeader()->getContext();
2164 MDString::get(Context,
"llvm.loop.unswitch.injection.disable"));
2166 Context, L.getLoopID(), {
"llvm.loop.unswitch.injection"},
2167 {DisableUnswitchMD});
2168 L.setLoopID(NewLoopID);
2170 U.revisitCurrentLoop();
2172 U.markLoopAsDeleted(L, LoopName);
2179 LPMUpdater &LoopUpdater,
bool InsertFreeze,
bool InjectedCondition) {
2182 SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2186 std::string LoopName(L.getName());
2192 "Can only unswitch switches and conditional branch!");
2196 !PartiallyInvariant);
2199 "Cannot have other invariants with full unswitching!");
2202 "Partial unswitching requires an instruction as the condition!");
2215 if (!FullUnswitch) {
2219 PartiallyInvariant) &&
2220 "Only `or`, `and`, an `select`, partially invariant instructions "
2221 "can combine invariants being unswitched.");
2232 BI ? BI->
getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2237 for (
auto Case : SI->cases())
2238 if (Case.getCaseSuccessor() != RetainedSuccBB)
2239 UnswitchedSuccBBs.
insert(Case.getCaseSuccessor());
2241 assert(!UnswitchedSuccBBs.
count(RetainedSuccBB) &&
2242 "Should not unswitch the same successor we are retaining!");
2251 Loop *ParentL = L.getParentLoop();
2260 Loop *OuterExitL = &L;
2262 L.getUniqueExitBlocks(ExitBlocks);
2263 for (
auto *ExitBB : ExitBlocks) {
2267 if (!NewOuterExitL) {
2269 OuterExitL =
nullptr;
2272 if (NewOuterExitL != OuterExitL && NewOuterExitL->
contains(OuterExitL))
2273 OuterExitL = NewOuterExitL;
2293 for (
auto *SuccBB : llvm::concat<BasicBlock *const>(
ArrayRef(RetainedSuccBB),
2295 if (SuccBB->getUniquePredecessor() ||
2297 return PredBB == ParentBB || DT.
dominates(SuccBB, PredBB);
2300 DominatingSucc[BB] = SuccBB;
2319 for (
auto *SuccBB : UnswitchedSuccBBs) {
2322 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2323 DominatingSucc, *VMaps.
back(), DTUpdates, AC, DT, LI, MSSAU, SE);
2328 if (TI.
getMetadata(LLVMContext::MD_make_implicit)) {
2332 TI.
setMetadata(LLVMContext::MD_make_implicit,
nullptr);
2339 TI.
setMetadata(LLVMContext::MD_make_implicit,
nullptr);
2346 SplitBB->getTerminator()->eraseFromParent();
2350 NewTI->
insertInto(ParentBB, ParentBB->end());
2370 DTUpdates.
push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2372 assert(SI &&
"Must either be a branch or switch!");
2375 assert(SI->getDefaultDest() == RetainedSuccBB &&
2376 "Not retaining default successor!");
2377 SI->setDefaultDest(LoopPH);
2378 for (
const auto &Case : SI->cases())
2379 if (Case.getCaseSuccessor() == RetainedSuccBB)
2380 Case.setSuccessor(LoopPH);
2382 Case.setSuccessor(ClonedPHs.
find(Case.getCaseSuccessor())->second);
2385 SI->setCondition(
new FreezeInst(SI->getCondition(),
2386 SI->getCondition()->getName() +
".fr",
2387 SI->getIterator()));
2394 {DominatorTree::Insert, SplitBB, ClonedPHs.
find(SuccBB)->second});
2408 for (
auto &VMap : VMaps)
2424 "Only one possible unswitched block for a branch!");
2428 DTUpdates.
push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2438 "Not retaining default successor!");
2439 for (
const auto &Case : NewSI->
cases())
2440 Case.getCaseSuccessor()->removePredecessor(
2448 DTUpdates.
push_back({DominatorTree::Delete, ParentBB, SuccBB});
2459 assert(BI &&
"Only branches have partial unswitching.");
2461 "Only one possible unswitched block for a branch!");
2465 if (PartiallyInvariant)
2467 *SplitBB, Invariants,
Direction, *ClonedPH, *LoopPH, L, MSSAU);
2470 *SplitBB, Invariants,
Direction, *ClonedPH, *LoopPH,
2473 DTUpdates.
push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2480 for (
auto &VMap : VMaps)
2500 for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2523 assert(DT.
verify(DominatorTree::VerificationLevel::Fast));
2525 if (BI && !PartiallyInvariant) {
2531 "Only one possible unswitched block for a branch!");
2543 bool ReplaceUnswitched =
2544 FullUnswitch || (Invariants.
size() == 1) || PartiallyInvariant;
2552 for (
Value *Invariant : Invariants) {
2553 assert(!isa<Constant>(Invariant) &&
2554 "Should not be replacing constant values!");
2557 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2564 U.set(ContinueReplacement);
2565 else if (ReplaceUnswitched &&
2567 U.set(UnswitchedReplacement);
2584 auto UpdateLoop = [&](
Loop &UpdateL) {
2586 UpdateL.verifyLoop();
2587 for (
Loop *ChildL : UpdateL) {
2588 ChildL->verifyLoop();
2589 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2590 "Perturbed a child loop's LCSSA form!");
2610 for (
Loop *UpdatedL :
2611 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2612 UpdateLoop(*UpdatedL);
2613 if (UpdatedL->isOutermost())
2614 OuterExitL =
nullptr;
2618 if (L.isOutermost())
2619 OuterExitL =
nullptr;
2624 if (OuterExitL != &L)
2625 for (
Loop *OuterL = ParentL; OuterL != OuterExitL;
2627 UpdateLoop(*OuterL);
2639 for (
Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2640 if (UpdatedL->getParentLoop() == ParentL)
2642 postUnswitch(L, LoopUpdater, LoopName, IsStillLoop, PartiallyInvariant,
2643 InjectedCondition, SibLoops);
2666 auto BBCostIt = BBCostMap.
find(
N.getBlock());
2667 if (BBCostIt == BBCostMap.
end())
2671 auto DTCostIt = DTCostMap.
find(&
N);
2672 if (DTCostIt != DTCostMap.
end())
2673 return DTCostIt->second;
2678 N.begin(),
N.end(), BBCostIt->second,
2680 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2682 bool Inserted = DTCostMap.
insert({&
N,
Cost}).second;
2684 assert(Inserted &&
"Should not insert a node while visiting children!");
2719 SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2721 BasicBlock *ThenBB = CondBr->getSuccessor(0),
2722 *TailBB = CondBr->getSuccessor(1);
2727 PHINode::Create(SI->getType(), 2,
"unswitched.select", SI->getIterator());
2728 Phi->addIncoming(SI->getTrueValue(), ThenBB);
2729 Phi->addIncoming(SI->getFalseValue(), HeadBB);
2730 Phi->setDebugLoc(SI->getDebugLoc());
2731 SI->replaceAllUsesWith(Phi);
2732 SI->eraseFromParent();
2775 GI->
getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2782 GuardedBlock->
setName(
"guarded");
2833 return L.contains(SuccBB);
2835 NumCostMultiplierSkipped++;
2839 auto *ParentL = L.getParentLoop();
2840 int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2841 : std::distance(LI.
begin(), LI.
end()));
2845 int UnswitchedClones = 0;
2846 for (
const auto &Candidate : UnswitchCandidates) {
2849 bool SkipExitingSuccessors = DT.
dominates(CondBlock, Latch);
2850 if (isa<SelectInst>(CI)) {
2855 if (!SkipExitingSuccessors)
2859 int NonExitingSuccessors =
2861 [SkipExitingSuccessors, &L](
const BasicBlock *SuccBB) {
2862 return !SkipExitingSuccessors || L.contains(SuccBB);
2864 UnswitchedClones +=
Log2_32(NonExitingSuccessors);
2872 unsigned ClonesPower =
2876 int SiblingsMultiplier =
2877 std::max((ParentL ? SiblingsCount
2887 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2891 <<
" (siblings " << SiblingsMultiplier <<
" * clones "
2892 << (1 << ClonesPower) <<
")"
2893 <<
" for unswitch candidate: " << TI <<
"\n");
2894 return CostMultiplier;
2902 assert(UnswitchCandidates.
empty() &&
"Should be!");
2906 if (isa<Constant>(
Cond))
2908 if (L.isLoopInvariant(
Cond)) {
2916 if (!Invariants.
empty())
2917 UnswitchCandidates.
push_back({
I, std::move(Invariants)});
2922 bool CollectGuards =
false;
2924 auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
2926 if (GuardDecl && !GuardDecl->use_empty())
2927 CollectGuards =
true;
2930 for (
auto *BB : L.blocks()) {
2934 for (
auto &
I : *BB) {
2935 if (
auto *SI = dyn_cast<SelectInst>(&
I)) {
2936 auto *
Cond = SI->getCondition();
2938 if (
Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1))
2939 AddUnswitchCandidatesForInst(SI,
Cond);
2940 }
else if (CollectGuards &&
isGuard(&
I)) {
2944 if (!isa<Constant>(
Cond) && L.isLoopInvariant(
Cond))
2949 if (
auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2952 if (!isa<Constant>(SI->getCondition()) &&
2953 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2954 UnswitchCandidates.
push_back({SI, {SI->getCondition()}});
2958 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2959 if (!BI || !BI->isConditional() ||
2960 BI->getSuccessor(0) == BI->getSuccessor(1))
2963 AddUnswitchCandidatesForInst(BI, BI->getCondition());
2967 !
any_of(UnswitchCandidates, [&L](
auto &TerminatorAndInvariants) {
2968 return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
2973 dbgs() <<
"simple-loop-unswitch: Found partially invariant condition "
2974 << *
Info->InstToDuplicate[0] <<
"\n");
2975 PartialIVInfo = *
Info;
2976 PartialIVCondBranch = L.getHeader()->getTerminator();
2980 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
2983 return !UnswitchCandidates.
empty();
2996 if (!L.contains(IfTrue)) {
2997 Pred = ICmpInst::getInversePredicate(Pred);
3002 if (L.isLoopInvariant(
LHS)) {
3003 Pred = ICmpInst::getSwappedPredicate(Pred);
3009 Pred = ICmpInst::ICMP_ULT;
3010 RHS = ConstantInt::get(
3022 if (L.isLoopInvariant(
LHS) || !L.isLoopInvariant(
RHS))
3025 if (Pred != ICmpInst::ICMP_ULT)
3028 if (!L.contains(IfTrue) || L.contains(IfFalse))
3032 if (L.getHeader() == IfTrue)
3049 assert(Weights.
size() == 2 &&
"Unexpected profile data!");
3051 auto Num = Weights[
Idx];
3052 auto Denom = Weights[0] + Weights[1];
3054 if (Denom == 0 || Num > Denom)
3057 if (LikelyTaken > ActualTaken)
3080static NonTrivialUnswitchCandidate
3084 assert(Candidate.hasPendingInjection() &&
"Nothing to inject!");
3085 BasicBlock *Preheader = L.getLoopPreheader();
3086 assert(Preheader &&
"Loop is not in simplified form?");
3088 "Unswitching branch of inner loop!");
3090 auto Pred = Candidate.PendingInjection->Pred;
3091 auto *
LHS = Candidate.PendingInjection->LHS;
3092 auto *
RHS = Candidate.PendingInjection->RHS;
3093 auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
3094 auto *TI = cast<BranchInst>(Candidate.TI);
3095 auto *BB = Candidate.TI->getParent();
3096 auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1)
3097 : TI->getSuccessor(0);
3099 assert(L.contains(InLoopSucc) &&
"Not supported yet!");
3100 assert(!L.contains(OutOfLoopSucc) &&
"Not supported yet!");
3101 auto &Ctx = BB->getContext();
3104 assert(ICmpInst::isUnsigned(Pred) &&
"Not supported yet!");
3114 auto *InjectedCond =
3115 ICmpInst::Create(Instruction::ICmp, Pred,
LHS,
RHS,
"injected.cond",
3119 BB->getParent(), InLoopSucc);
3122 Builder.
CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
3125 Builder.
CreateCondBr(TI->getCondition(), TI->getSuccessor(0),
3126 TI->getSuccessor(1));
3130 for (
auto &
I : *InLoopSucc) {
3131 auto *PN = dyn_cast<PHINode>(&
I);
3134 auto *Inc = PN->getIncomingValueForBlock(BB);
3135 PN->addIncoming(Inc, CheckBlock);
3137 OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
3140 { DominatorTree::Insert, BB, CheckBlock },
3141 { DominatorTree::Insert, CheckBlock, InLoopSucc },
3142 { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
3143 { DominatorTree::Delete, BB, OutOfLoopSucc }
3149 L.addBasicBlockToLoop(CheckBlock, LI);
3161 LLVM_DEBUG(
dbgs() <<
"Injected a new loop-invariant branch " << *InvariantBr
3162 <<
" and considering it for unswitching.");
3163 ++NumInvariantConditionsInjected;
3164 return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
3185 assert(ICmpInst::isStrictPredicate(Pred));
3186 if (Compares.
size() < 2)
3189 for (
auto Prev = Compares.
begin(), Next = Compares.
begin() + 1;
3190 Next != Compares.
end(); ++Prev, ++Next) {
3194 InjectedInvariant ToInject(NonStrictPred,
LHS,
RHS, InLoopSucc);
3195 NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
3196 std::nullopt, std::move(ToInject));
3197 UnswitchCandidates.
push_back(std::move(Candidate));
3227 auto *Latch = L.getLoopLatch();
3231 assert(L.getLoopPreheader() &&
"Must have a preheader!");
3236 for (
auto *DTN = DT.
getNode(Latch); L.contains(DTN->getBlock());
3237 DTN = DTN->getIDom()) {
3240 BasicBlock *IfTrue =
nullptr, *IfFalse =
nullptr;
3241 auto *BB = DTN->getBlock();
3245 auto *Term = BB->getTerminator();
3259 CompareDesc
Desc(cast<BranchInst>(Term),
RHS, IfTrue);
3260 while (
auto *Zext = dyn_cast<ZExtInst>(
LHS))
3261 LHS = Zext->getOperand(0);
3262 CandidatesULT[
LHS].push_back(
Desc);
3266 for (
auto &It : CandidatesULT)
3268 UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
3273 if (!L.isSafeToClone())
3275 for (
auto *BB : L.blocks())
3276 for (
auto &
I : *BB) {
3277 if (
I.getType()->isTokenTy() &&
I.isUsedOutsideOfBlock(BB))
3279 if (
auto *CB = dyn_cast<CallBase>(&
I)) {
3280 assert(!CB->cannotDuplicate() &&
"Checked by L.isSafeToClone().");
3281 if (CB->isConvergent())
3294 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
3298 L.getUniqueExitBlocks(ExitBlocks);
3303 for (
auto *ExitBB : ExitBlocks) {
3304 auto *
I = ExitBB->getFirstNonPHI();
3305 if (isa<CleanupPadInst>(
I) || isa<CatchSwitchInst>(
I)) {
3306 LLVM_DEBUG(
dbgs() <<
"Cannot unswitch because of cleanuppad/catchswitch "
3334 L.getHeader()->getParent()->hasMinSize()
3338 for (
auto *BB : L.blocks()) {
3340 for (
auto &
I : *BB) {
3345 assert(
Cost >= 0 &&
"Must not have negative costs!");
3347 assert(LoopCost >= 0 &&
"Must not have negative loop costs!");
3348 BBCostMap[BB] =
Cost;
3372 if (isa<SelectInst>(TI))
3381 if (!Visited.
insert(SuccBB).second)
3389 if (!FullUnswitch) {
3390 auto &BI = cast<BranchInst>(TI);
3393 if (SuccBB == BI.getSuccessor(1))
3396 if (SuccBB == BI.getSuccessor(0))
3399 SuccBB == BI.getSuccessor(0)) ||
3401 SuccBB == BI.getSuccessor(1)))
3409 if (SuccBB->getUniquePredecessor() ||
3411 return PredBB == &BB || DT.
dominates(SuccBB, PredBB);
3415 "Non-duplicated cost should never exceed total loop cost!");
3424 int SuccessorsCount =
isGuard(&TI) ? 2 : Visited.
size();
3425 assert(SuccessorsCount > 1 &&
3426 "Cannot unswitch a condition without multiple distinct successors!");
3427 return (LoopCost -
Cost) * (SuccessorsCount - 1);
3430 std::optional<NonTrivialUnswitchCandidate> Best;
3431 for (
auto &Candidate : UnswitchCandidates) {
3436 !BI || Candidate.hasPendingInjection() ||
3437 (Invariants.
size() == 1 &&
3439 InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
3443 int CostMultiplier =
3447 "cost multiplier needs to be in the range of 1..UnswitchThreshold");
3448 CandidateCost *= CostMultiplier;
3450 <<
" (multiplier: " << CostMultiplier <<
")"
3451 <<
" for unswitch candidate: " << TI <<
"\n");
3454 <<
" for unswitch candidate: " << TI <<
"\n");
3457 if (!Best || CandidateCost < Best->
Cost) {
3459 Best->Cost = CandidateCost;
3462 assert(Best &&
"Must be!");
3474 assert(isa<BranchInst>(TI) || isa<SwitchInst>(TI));
3484 if (
BranchInst *BI = dyn_cast<BranchInst>(&TI))
3489 Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
3503 PartialIVCondBranch, L, LI, AA, MSSAU);
3506 PartialIVCondBranch, L, DT, LI, AA,
3509 if (UnswitchCandidates.
empty())
3513 dbgs() <<
"Considering " << UnswitchCandidates.
size()
3514 <<
" non-trivial loop invariant conditions for unswitching.\n");
3517 UnswitchCandidates, L, DT, LI, AC,
TTI, PartialIVInfo);
3519 assert(Best.TI &&
"Failed to find loop unswitch candidate");
3520 assert(Best.Cost &&
"Failed to compute cost");
3523 LLVM_DEBUG(
dbgs() <<
"Cannot unswitch, lowest cost found: " << *Best.Cost
3528 bool InjectedCondition =
false;
3529 if (Best.hasPendingInjection()) {
3531 InjectedCondition =
true;
3533 assert(!Best.hasPendingInjection() &&
3534 "All injections should have been done by now!");
3536 if (Best.TI != PartialIVCondBranch)
3540 if (
auto *SI = dyn_cast<SelectInst>(Best.TI)) {
3546 SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);
3556 LLVM_DEBUG(
dbgs() <<
" Unswitching non-trivial (cost = " << Best.Cost
3557 <<
") terminator: " << *Best.TI <<
"\n");
3559 LI, AC, SE, MSSAU, LoopUpdater, InsertFreeze,
3591 assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3592 "Loops must be in LCSSA form before unswitching.");
3595 if (!L.isLoopSimplifyForm())
3608 const Function *
F = L.getHeader()->getParent();
3621 bool ContinueWithNonTrivial =
3623 if (!ContinueWithNonTrivial)
3627 if (
F->hasOptSize())
3632 auto IsLoopNestCold = [&](
const Loop *L) {
3638 Parent = Parent->getParentLoop();
3642 Worklist.
insert(Worklist.
end(), L->getSubLoops().begin(),
3643 L->getSubLoops().end());
3644 while (!Worklist.
empty()) {
3648 Worklist.
insert(Worklist.
end(), CurLoop->getSubLoops().begin(),
3649 CurLoop->getSubLoops().end());
3683 Function &
F = *L.getHeader()->getParent();
3686 if (
auto OuterProxy =
3690 LLVM_DEBUG(
dbgs() <<
"Unswitching loop in " <<
F.getName() <<
": " << L
3693 std::optional<MemorySSAUpdater> MSSAU;
3700 &AR.
SE, MSSAU ? &*MSSAU :
nullptr, PSI, AR.
BFI, U))
3719 OS, MapClassName2PassName);
3722 OS << (NonTrivial ?
"" :
"no-") <<
"nontrivial;";
3723 OS << (Trivial ?
"" :
"no-") <<
"trivial";
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
This defines the Use class.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
This header provides classes for managing per-loop analyses.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
uint64_t IntrinsicInst * II
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Provides some synthesis utilities to produce sequences of values.
This file implements a set that has insertion order iteration characteristics.
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater)
Unswitch control flow predicated on loop invariant conditions.
static void canonicalizeForInvariantConditionInjection(ICmpInst::Predicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue, BasicBlock *&IfFalse, const Loop &L)
Tries to canonicalize condition described by:
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
static int CalculateUnswitchCostMultiplier(const Instruction &TI, const Loop &L, const LoopInfo &LI, const DominatorTree &DT, ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU)
Copy a set of loop invariant values, and conditionally branch on them.
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, const LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy > > VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
static BranchInst * turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, AssumptionCache *AC)
Turns a select instruction into implicit control flow branch, making the following replacement:
static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI)
void postUnswitch(Loop &L, LPMUpdater &U, StringRef LoopName, bool CurrentLoopValid, bool PartiallyInvariant, bool InjectedCondition, ArrayRef< Loop * > NewLoops)
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, const Instruction *I, AssumptionCache *AC, const DominatorTree &DT)
Copy a set of loop invariant values ToDuplicate and insert them at the end of BB and conditionally br...
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
static bool shouldTryInjectInvariantCondition(const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L)
Returns true, if predicate described by ( Pred, LHS, RHS ) succeeding into blocks ( IfTrue,...
static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates, const Loop &L, const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo)
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
static Value * skipTrivialSelect(Value *Cond)
static Loop * getTopMostExitingLoop(const BasicBlock *ExitBB, const LoopInfo &LI)
static bool collectUnswitchCandidatesWithInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
Collect unswitch candidates by invariant conditions that are not immediately present in the loop.
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, Constant &Replacement)
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
static bool collectUnswitchCandidates(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, const Loop &L, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
static cl::opt< bool > InjectInvariantConditions("simple-loop-unswitch-inject-invariant-conditions", cl::Hidden, cl::desc("Whether we should inject new invariants and unswitch them to " "eliminate some existing (non-invariant) conditions."), cl::init(true))
static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, AssumptionCache &AC)
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
static bool areLoopExitPHIsLoopInvariant(const Loop &L, const BasicBlock &ExitingBB, const BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
static bool insertCandidatesWithPendingInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, Loop &L, ICmpInst::Predicate Pred, ArrayRef< CompareDesc > Compares, const DominatorTree &DT)
Given chain of loop branch conditions looking like: br (Variant < Invariant1) br (Variant < Invariant...
static NonTrivialUnswitchCandidate injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, MemorySSAUpdater *MSSAU)
Materialize pending invariant condition of the given candidate into IR.
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
static cl::opt< unsigned > InjectInvariantConditionHotnesThreshold("simple-loop-unswitch-inject-invariant-condition-hotness-threshold", cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " "unswitch on them to eliminate branches that are " "not-taken 1/<this option> times or less."), cl::init(16))
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition)
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops, ScalarEvolution *SE)
Rebuild a loop after unswitching removes some subset of blocks and edges.
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater)
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Build the cloned blocks for an unswitched copy of the given loop.
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, LPMUpdater &LoopUpdater)
bool shouldTryInjectBasingOnMetadata(const BranchInst *BI, const BasicBlock *TakenSucc)
Returns true, if metadata on BI allows us to optimize branching into TakenSucc via injection of invar...
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
bool isOneValue() const
Returns true if the value is one.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents a freeze function that returns random concrete value if an operand is either a ...
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
bool isRelational() const
Return true if the predicate is relational (not EQ or NE).
Value * CreateFreeze(Value *V, const Twine &Name="")
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void dropLocation()
Drop the instruction's debug location.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
A wrapper class for inspecting calls to intrinsic functions.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Represents a single loop in the control flow graph.
StringRef getName() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static MDString * get(LLVMContext &Context, StringRef Str)
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
An analysis that produces MemorySSA for a function.
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point)
Create a MemoryAccess in MemorySSA at a specified point in a block.
void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Encapsulates MemorySSA, including all data associated with memory accesses.
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
A Module instance is used to store all the information related to an LLVM module.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
The main scalar evolution driver.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
void forgetTopmostLoop(const Loop *L)
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
Instruction::InstListType::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
ConstantIntT * getCaseValue() const
Resolves case value for current case.
BasicBlock * getDefaultDest() const
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, InsertPosition InsertBefore=nullptr)
void setDefaultDest(BasicBlock *DefaultCase)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
void push_back(EltTy NewVal)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
unsigned getIntegerBitWidth() const
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void setName(const Twine &Name)
Change the name of the value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
auto reverse(ContainerTy &&C)
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&...args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
void sort(IteratorTy Start, IteratorTy End)
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool VerifyLoopInfo
Enable verification of loop info.
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
bool VerifyMemorySSA
Enables verification of MemorySSA.
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
auto predecessors(const MachineBasicBlock *BB)
bool pred_empty(const BasicBlock *BB)
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A special type used by analysis passes to provide an address that identifies that particular analysis...
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Description of the encoding of one expression Op.
Struct to hold information about a partially invariant condition.
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Direction
An enum for the direction of the loop.
A CRTP mix-in to automatically provide informational APIs needed for passes.