52#define DEBUG_TYPE "loop-interchange"
54STATISTIC(LoopsInterchanged,
"Number of loops interchanged");
58 cl::desc(
"Interchange if you gain more than this number"));
65using CharMatrix = std::vector<std::vector<char>>;
77 for (
auto &Row : DepMatrix) {
96 if (!isa<Instruction>(
I))
98 if (
auto *Ld = dyn_cast<LoadInst>(&
I)) {
102 }
else if (
auto *St = dyn_cast<StoreInst>(&
I)) {
105 MemInstr.push_back(&
I);
111 <<
" Loads and Stores to analyze\n");
113 ValueVector::iterator
I, IE, J, JE;
116 for (
I = MemInstr.begin(), IE = MemInstr.end();
I != IE; ++
I) {
117 for (J =
I, JE = MemInstr.end(); J != JE; ++J) {
118 std::vector<char> Dep;
122 if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
125 if (
auto D = DI->
depends(Src, Dst,
true)) {
126 assert(
D->isOrdered() &&
"Expected an output, flow or anti dep.");
129 if (
D->normalize(SE))
132 D->isFlow() ?
"flow" :
D->isAnti() ?
"anti" :
"output";
133 dbgs() <<
"Found " << DepType
134 <<
" dependency between Src and Dst\n"
135 <<
" Src:" << *Src <<
"\n Dst:" << *Dst <<
'\n');
136 unsigned Levels =
D->getLevels();
138 for (
unsigned II = 1;
II <= Levels; ++
II) {
139 if (
D->isScalar(
II)) {
143 unsigned Dir =
D->getDirection(
II);
157 while (Dep.size() != Level) {
163 DepMatrix.push_back(Dep);
167 <<
" dependencies inside loop\n");
181 for (
unsigned I = 0, E = DepMatrix.size();
I < E; ++
I)
182 std::swap(DepMatrix[
I][ToIndx], DepMatrix[
I][FromIndx]);
201 unsigned InnerLoopId,
202 unsigned OuterLoopId) {
203 unsigned NumRows = DepMatrix.size();
204 std::vector<char> Cur;
206 for (
unsigned Row = 0; Row < NumRows; ++Row) {
209 Cur = DepMatrix[Row];
212 std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
221 << L.getHeader()->getParent()->getName() <<
" Loop: %"
222 << L.getHeader()->getName() <<
'\n');
223 assert(LoopList.empty() &&
"LoopList should initially be empty!");
224 Loop *CurrentLoop = &L;
225 const std::vector<Loop *> *Vec = &CurrentLoop->
getSubLoops();
226 while (!Vec->empty()) {
230 if (Vec->size() != 1) {
235 LoopList.push_back(CurrentLoop);
236 CurrentLoop = Vec->front();
239 LoopList.push_back(CurrentLoop);
243 unsigned LoopNestDepth = LoopList.
size();
244 if (LoopNestDepth < 2) {
245 LLVM_DEBUG(
dbgs() <<
"Loop doesn't contain minimum nesting level.\n");
253class LoopInterchangeLegality {
257 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
260 bool canInterchangeLoops(
unsigned InnerLoopId,
unsigned OuterLoopId,
261 CharMatrix &DepMatrix);
269 bool isLoopStructureUnderstood();
271 bool currentLimitations();
274 return OuterInnerReductions;
278 return InnerLoopInductions;
282 bool tightlyNested(
Loop *Outer,
Loop *Inner);
283 bool containsUnsafeInstructions(
BasicBlock *BB);
289 bool findInductionAndReductions(
Loop *L,
311class LoopInterchangeProfitability {
315 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
319 unsigned InnerLoopId,
unsigned OuterLoopId,
320 CharMatrix &DepMatrix,
322 std::unique_ptr<CacheCost> &
CC);
325 int getInstrOrderCost();
326 std::optional<bool> isProfitablePerLoopCacheAnalysis(
328 std::unique_ptr<CacheCost> &
CC);
329 std::optional<bool> isProfitablePerInstrOrderCost();
330 std::optional<bool> isProfitableForVectorization(
unsigned InnerLoopId,
331 unsigned OuterLoopId,
332 CharMatrix &DepMatrix);
344class LoopInterchangeTransform {
348 const LoopInterchangeLegality &LIL)
349 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
353 void restructureLoops(
Loop *NewInner,
Loop *NewOuter,
356 void removeChildLoop(
Loop *OuterLoop,
Loop *InnerLoop);
359 bool adjustLoopLinks();
360 bool adjustLoopBranches();
371 const LoopInterchangeLegality &LIL;
374struct LoopInterchange {
379 std::unique_ptr<CacheCost>
CC =
nullptr;
387 : SE(SE), LI(LI), DI(DI), DT(DT),
CC(
std::
move(
CC)), ORE(ORE) {}
390 if (
L->getParentLoop())
394 return processLoopList(LoopList);
399 for (
unsigned I = 1;
I < LoopList.size(); ++
I)
400 if (LoopList[
I]->getParentLoop() != LoopList[
I - 1])
402 return processLoopList(LoopList);
406 for (
Loop *L : LoopList) {
408 if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
412 if (
L->getNumBackEdges() != 1) {
416 if (!
L->getExitingBlock()) {
427 return LoopList.
size() - 1;
431 bool Changed =
false;
436 unsigned LoopNestDepth = LoopList.
size();
442 if (!isComputableLoopNest(LoopList)) {
443 LLVM_DEBUG(
dbgs() <<
"Not valid loop candidate for interchange\n");
447 LLVM_DEBUG(
dbgs() <<
"Processing LoopList of size = " << LoopNestDepth
450 CharMatrix DependencyMatrix;
451 Loop *OuterMostLoop = *(LoopList.
begin());
453 OuterMostLoop, DI, SE)) {
468 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
479 const auto &LoopCosts =
CC->getLoopCosts();
480 for (
unsigned i = 0; i < LoopCosts.size(); i++) {
481 CostMap[LoopCosts[i].first] = i;
488 for (
unsigned j = SelecLoopId;
j > 0;
j--) {
489 bool ChangedPerIter =
false;
490 for (
unsigned i = SelecLoopId; i > SelecLoopId -
j; i--) {
491 bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
492 DependencyMatrix, CostMap);
503 ChangedPerIter |= Interchanged;
504 Changed |= Interchanged;
514 bool processLoop(
Loop *InnerLoop,
Loop *OuterLoop,
unsigned InnerLoopId,
515 unsigned OuterLoopId,
516 std::vector<std::vector<char>> &DependencyMatrix,
519 <<
" and OuterLoopId = " << OuterLoopId <<
"\n");
520 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
521 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
522 LLVM_DEBUG(
dbgs() <<
"Not interchanging loops. Cannot prove legality.\n");
526 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
527 if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
528 DependencyMatrix, CostMap, CC)) {
537 <<
"Loop interchanged with enclosing loop.";
540 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
552bool LoopInterchangeLegality::containsUnsafeInstructions(
BasicBlock *BB) {
554 return I.mayHaveSideEffects() ||
I.mayReadFromMemory();
558bool LoopInterchangeLegality::tightlyNested(
Loop *OuterLoop,
Loop *InnerLoop) {
570 if (!OuterLoopHeaderBI)
574 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->
getHeader() &&
575 Succ != OuterLoopLatch)
578 LLVM_DEBUG(
dbgs() <<
"Checking instructions in Loop header and Loop latch\n");
581 if (containsUnsafeInstructions(OuterLoopHeader) ||
582 containsUnsafeInstructions(OuterLoopLatch))
588 if (InnerLoopPreHeader != OuterLoopHeader &&
589 containsUnsafeInstructions(InnerLoopPreHeader))
597 if (&SuccInner != OuterLoopLatch) {
599 <<
" does not lead to the outer loop latch.\n";);
605 if (containsUnsafeInstructions(InnerLoopExit))
613bool LoopInterchangeLegality::isLoopStructureUnderstood() {
615 for (
PHINode *InnerInduction : InnerLoopInductions) {
616 unsigned Num = InnerInduction->getNumOperands();
617 for (
unsigned i = 0; i < Num; ++i) {
618 Value *Val = InnerInduction->getOperand(i);
619 if (isa<Constant>(Val))
628 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
629 InnerLoopPreheader &&
649 Value *Op0 = InnerLoopCmp->getOperand(0);
650 Value *Op1 = InnerLoopCmp->getOperand(1);
661 std::function<
bool(
Value *)> IsPathToInnerIndVar;
662 IsPathToInnerIndVar = [
this, &IsPathToInnerIndVar](
const Value *
V) ->
bool {
665 if (isa<Constant>(V))
670 if (isa<CastInst>(
I))
671 return IsPathToInnerIndVar(
I->getOperand(0));
672 if (isa<BinaryOperator>(
I))
673 return IsPathToInnerIndVar(
I->getOperand(0)) &&
674 IsPathToInnerIndVar(
I->getOperand(1));
680 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
685 if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
688 }
else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
711 if (
PHI->getNumIncomingValues() != 1)
719 if (isa<Constant>(V))
724 if (
PHI->getNumIncomingValues() == 1)
740bool LoopInterchangeLegality::findInductionAndReductions(
742 if (!
L->getLoopLatch() || !
L->getLoopPredecessor())
752 if (!OuterInnerReductions.count(&
PHI)) {
754 "across the outer loop.\n");
758 assert(
PHI.getNumIncomingValues() == 2 &&
759 "Phis in loop header should have exactly 2 incoming values");
768 <<
"Failed to recognize PHI as an induction or reduction.\n");
771 OuterInnerReductions.insert(&
PHI);
772 OuterInnerReductions.insert(InnerRedPhi);
781bool LoopInterchangeLegality::currentLimitations() {
791 dbgs() <<
"Loops where the latch is not the exiting block are not"
792 <<
" supported currently.\n");
797 <<
"Loops where the latch is not the exiting block cannot be"
798 " interchange currently.";
804 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
806 dbgs() <<
"Only outer loops with induction or reduction PHI nodes "
807 <<
"are supported currently.\n");
812 <<
"Only outer loops with induction or reduction PHI nodes can be"
813 " interchanged currently.";
822 Loop *CurLevelLoop = OuterLoop;
825 CurLevelLoop = CurLevelLoop->
getSubLoops().front();
826 if (!findInductionAndReductions(CurLevelLoop, Inductions,
nullptr)) {
828 dbgs() <<
"Only inner loops with induction or reduction PHI nodes "
829 <<
"are supported currently.\n");
834 <<
"Only inner loops with induction or reduction PHI nodes can be"
835 " interchange currently.";
842 if (!isLoopStructureUnderstood()) {
848 <<
"Inner loop structure not understood currently.";
856bool LoopInterchangeLegality::findInductions(
863 return !Inductions.
empty();
875 if (
PHI.getNumIncomingValues() > 1)
878 PHINode *PN = dyn_cast<PHINode>(U);
880 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
898 for (
unsigned i = 0; i <
PHI.getNumIncomingValues(); i++) {
899 Instruction *IncomingI = dyn_cast<Instruction>(
PHI.getIncomingValue(i));
945 for (
auto *U :
PHI.users()) {
954bool LoopInterchangeLegality::canInterchangeLoops(
unsigned InnerLoopId,
955 unsigned OuterLoopId,
956 CharMatrix &DepMatrix) {
958 LLVM_DEBUG(
dbgs() <<
"Failed interchange InnerLoopId = " << InnerLoopId
959 <<
" and OuterLoopId = " << OuterLoopId
960 <<
" due to dependence\n");
965 <<
"Cannot interchange loops due to dependences.";
970 for (
auto *BB : OuterLoop->
blocks())
972 if (
CallInst *CI = dyn_cast<CallInst>(&
I)) {
974 if (CI->onlyWritesMemory())
977 dbgs() <<
"Loops with call instructions cannot be interchanged "
983 <<
"Cannot interchange loops due to call instruction.";
989 if (!findInductions(InnerLoop, InnerLoopInductions)) {
990 LLVM_DEBUG(
dbgs() <<
"Could not find inner loop induction variables.\n");
995 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in inner loop latch.\n");
1000 <<
"Cannot interchange loops because unsupported PHI nodes found "
1001 "in inner loop latch.";
1008 if (currentLimitations()) {
1009 LLVM_DEBUG(
dbgs() <<
"Not legal because of current transform limitation\n");
1014 if (!tightlyNested(OuterLoop, InnerLoop)) {
1020 <<
"Cannot interchange loops because they are not tightly "
1027 OuterInnerReductions)) {
1028 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in inner loop exit.\n");
1033 <<
"Found unsupported PHI node in loop exit.";
1039 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in outer loop exit.\n");
1044 <<
"Found unsupported PHI node in loop exit.";
1052int LoopInterchangeProfitability::getInstrOrderCost() {
1053 unsigned GoodOrder, BadOrder;
1054 BadOrder = GoodOrder = 0;
1058 unsigned NumOp =
GEP->getNumOperands();
1059 bool FoundInnerInduction =
false;
1060 bool FoundOuterInduction =
false;
1061 for (
unsigned i = 0; i < NumOp; ++i) {
1076 if (AR->
getLoop() == InnerLoop) {
1079 FoundInnerInduction =
true;
1080 if (FoundOuterInduction) {
1090 if (AR->
getLoop() == OuterLoop) {
1093 FoundOuterInduction =
true;
1094 if (FoundInnerInduction) {
1103 return GoodOrder - BadOrder;
1107LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
1109 std::unique_ptr<CacheCost> &
CC) {
1114 unsigned InnerIndex = 0, OuterIndex = 0;
1115 InnerIndex = CostMap.
find(InnerLoop)->second;
1116 OuterIndex = CostMap.
find(OuterLoop)->second;
1118 <<
", OuterIndex = " << OuterIndex <<
"\n");
1119 if (InnerIndex < OuterIndex)
1120 return std::optional<bool>(
true);
1121 assert(InnerIndex != OuterIndex &&
"CostMap should assign unique "
1122 "numbers to each loop");
1123 if (
CC->getLoopCost(*OuterLoop) ==
CC->getLoopCost(*InnerLoop))
1124 return std::nullopt;
1125 return std::optional<bool>(
false);
1127 return std::nullopt;
1131LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
1135 int Cost = getInstrOrderCost();
1138 return std::optional<bool>(
true);
1140 return std::nullopt;
1143std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1144 unsigned InnerLoopId,
unsigned OuterLoopId, CharMatrix &DepMatrix) {
1145 for (
auto &Row : DepMatrix) {
1149 if (Row[InnerLoopId] ==
'I' || Row[InnerLoopId] ==
'=')
1150 return std::optional<bool>(
false);
1155 if (Row[OuterLoopId] !=
'I' && Row[OuterLoopId] !=
'=')
1156 return std::optional<bool>(
false);
1161 return std::optional<bool>(!DepMatrix.empty());
1164bool LoopInterchangeProfitability::isProfitable(
1165 const Loop *InnerLoop,
const Loop *OuterLoop,
unsigned InnerLoopId,
1166 unsigned OuterLoopId, CharMatrix &DepMatrix,
1168 std::unique_ptr<CacheCost> &
CC) {
1177 std::optional<bool> shouldInterchange =
1178 isProfitablePerLoopCacheAnalysis(CostMap,
CC);
1179 if (!shouldInterchange.has_value()) {
1180 shouldInterchange = isProfitablePerInstrOrderCost();
1181 if (!shouldInterchange.has_value())
1183 isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
1185 if (!shouldInterchange.has_value()) {
1190 <<
"Insufficient information to calculate the cost of loop for "
1194 }
else if (!shouldInterchange.value()) {
1199 <<
"Interchanging loops is not considered to improve cache "
1200 "locality nor vectorization.";
1207void LoopInterchangeTransform::removeChildLoop(
Loop *OuterLoop,
1209 for (
Loop *L : *OuterLoop)
1210 if (L == InnerLoop) {
1211 OuterLoop->removeChildLoop(L);
1240void LoopInterchangeTransform::restructureLoops(
1250 if (OuterLoopParent) {
1252 removeChildLoop(OuterLoopParent, NewInner);
1253 removeChildLoop(NewInner, NewOuter);
1256 removeChildLoop(NewInner, NewOuter);
1281 if (BB == OuterHeader || BB == OuterLatch)
1296bool LoopInterchangeTransform::transform() {
1297 bool Transformed =
false;
1302 auto &InductionPHIs = LIL.getInnerLoopInductions();
1303 if (InductionPHIs.empty()) {
1304 LLVM_DEBUG(
dbgs() <<
"Failed to find the point to split loop latch \n");
1309 for (
PHINode *CurInductionPHI : InductionPHIs) {
1310 if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1312 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
1315 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
1328 auto MoveInstructions = [&i, &WorkList,
this, &InductionPHIs, NewLatch]() {
1329 for (; i < WorkList.
size(); i++) {
1335 "Moving instructions with side-effects may change behavior of "
1346 for (
Value *
Op : WorkList[i]->operands()) {
1364 for (
Instruction *InnerIndexVar : InnerIndexVarList)
1365 WorkList.
insert(cast<Instruction>(InnerIndexVar));
1382 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1383 if (InnerLoopPreHeader != OuterLoopHeader) {
1387 std::prev(InnerLoopPreHeader->
end()))))
1391 Transformed |= adjustLoopLinks();
1416 I->removeFromParent();
1431 std::vector<DominatorTree::UpdateType> &DTUpdates,
1432 bool MustUpdateOnce =
true) {
1433 assert((!MustUpdateOnce ||
1437 }) == 1) &&
"BI must jump to OldBB exactly once.");
1438 bool Changed =
false;
1446 DTUpdates.push_back(
1447 {DominatorTree::UpdateKind::Insert, BI->
getParent(), NewBB});
1448 DTUpdates.push_back(
1449 {DominatorTree::UpdateKind::Delete, BI->
getParent(), OldBB});
1451 assert(Changed &&
"Expected a successor to be updated");
1468 assert(
P.getNumIncomingValues() == 1 &&
1469 "Only loops with a single exit are supported!");
1472 auto IncI = cast<Instruction>(
P.getIncomingValueForBlock(InnerLatch));
1475 auto IncIInnerMost = cast<Instruction>(
followLCSSA(IncI));
1478 if (IncIInnerMost->getParent() != InnerLatch &&
1479 IncIInnerMost->
getParent() != InnerHeader)
1483 [OuterHeader, OuterExit, IncI, InnerHeader](
User *U) {
1484 return (cast<PHINode>(U)->getParent() == OuterHeader &&
1485 IncI->getParent() == InnerHeader) ||
1486 cast<PHINode>(U)->getParent() == OuterExit;
1488 "Can only replace phis iff the uses are in the loop nest exit or "
1489 "the incoming value is defined in the inner header (it will "
1490 "dominate all loop blocks after interchanging)");
1491 P.replaceAllUsesWith(IncI);
1492 P.eraseFromParent();
1522 if (
P.getNumIncomingValues() != 1)
1526 auto I = dyn_cast<Instruction>(
P.getIncomingValue(0));
1530 PHINode *NewPhi = dyn_cast<PHINode>(
P.clone());
1536 if (Pred == OuterLatch)
1541 P.setIncomingValue(0, NewPhi);
1551bool LoopInterchangeTransform::adjustLoopBranches() {
1553 std::vector<DominatorTree::UpdateType> DTUpdates;
1555 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1558 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1559 InnerLoopPreHeader != InnerLoop->
getHeader() && OuterLoopPreHeader &&
1560 InnerLoopPreHeader &&
"Guaranteed by loop-simplify form");
1565 if (isa<PHINode>(OuterLoopPreHeader->
begin()) ||
1567 OuterLoopPreHeader =
1569 if (InnerLoopPreHeader == OuterLoop->getHeader())
1570 InnerLoopPreHeader =
1575 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1577 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1593 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1594 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1599 dyn_cast<BranchInst>(InnerLoopLatchPredecessor->
getTerminator());
1601 dyn_cast<BranchInst>(OuterLoopPredecessor->
getTerminator());
1603 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1606 if (!InnerLoopHeaderSuccessor)
1614 InnerLoopPreHeader, DTUpdates,
false);
1624 InnerLoopHeaderSuccessor, DTUpdates,
1632 OuterLoopPreHeader, DTUpdates);
1635 if (InnerLoopLatchBI->
getSuccessor(0) == InnerLoopHeader)
1636 InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(1);
1638 InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(0);
1641 InnerLoopLatchSuccessor, DTUpdates);
1643 if (OuterLoopLatchBI->
getSuccessor(0) == OuterLoopHeader)
1644 OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(1);
1646 OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(0);
1649 OuterLoopLatchSuccessor, DTUpdates);
1650 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1654 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1655 OuterLoopPreHeader);
1657 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1658 OuterLoopHeader, OuterLoopLatch, InnerLoop->
getExitBlock(),
1664 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1668 if (OuterInnerReductions.contains(&
PHI))
1672 if (OuterInnerReductions.contains(&
PHI))
1681 assert(OuterInnerReductions.count(
PHI) &&
"Expected a reduction PHI node");
1686 assert(OuterInnerReductions.count(
PHI) &&
"Expected a reduction PHI node");
1708bool LoopInterchangeTransform::adjustLoopLinks() {
1710 bool Changed = adjustLoopBranches();
1715 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1733 std::unique_ptr<CacheCost>
CC =
1736 if (!LoopInterchange(&AR.
SE, &AR.
LI, &DI, &AR.
DT,
CC, &ORE).run(LN))
1738 U.markLoopNestChanged(
true);
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass
This file defines the interface for the loop cache analysis.
static bool hasMinimumLoopDepth(SmallVectorImpl< Loop * > &LoopList)
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
static PHINode * findInnerReductionPhi(Loop *L, Value *V)
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore)
Move all instructions except the terminator from FromBB right before InsertBefore.
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
static const unsigned MaxLoopNestDepth
static void printDepMatrix(CharMatrix &DepMatrix)
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
static bool areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, SmallPtrSetImpl< PHINode * > &Reductions)
static const unsigned MaxMemInstrCount
static Value * followLCSSA(Value *SV)
static void populateWorklist(Loop &L, LoopVector &LoopList)
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
static bool isLexicographicallyPositive(std::vector< char > &DV)
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE)
This file defines the interface for the loop nest analysis.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
uint64_t IntrinsicInst * II
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
static bool isProfitable(const SmallVector< std::unique_ptr< StableFunctionMap::StableFunctionEntry > > &SFS)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
StringSet - A set-like wrapper for the StringMap.
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
DependenceInfo - This class is the main dependence-analysis driver.
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End, bool CheckUniquePred=false)
Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Function * getParent() const
Return the function to which the loop-nest belongs.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
static unsigned getIncomingValueNumForOperand(unsigned i)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
This node represents a polynomial recurrence on the trip count of the specified loop.
const Loop * getLoop() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
StringSet - A wrapper for StringMap that provides set-like functionality.
std::pair< typename Base::iterator, bool > insert(StringRef key)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto successors(const MachineBasicBlock *BB)
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto map_range(ContainerTy &&C, FuncTy F)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Implement std::hash so that hash_code can be used in STL containers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Direction
An enum for the direction of the loop.