52#define DEBUG_TYPE "loop-interchange"
54STATISTIC(LoopsInterchanged,
"Number of loops interchanged");
58 cl::desc(
"Interchange if you gain more than this number"));
65using CharMatrix = std::vector<std::vector<char>>;
75#ifdef DUMP_DEP_MATRICIES
76static void printDepMatrix(CharMatrix &DepMatrix) {
77 for (
auto &Row : DepMatrix) {
96 if (!isa<Instruction>(
I))
98 if (
auto *Ld = dyn_cast<LoadInst>(&
I)) {
102 }
else if (
auto *St = dyn_cast<StoreInst>(&
I)) {
105 MemInstr.push_back(&
I);
111 <<
" Loads and Stores to analyze\n");
113 ValueVector::iterator
I, IE, J, JE;
115 for (
I = MemInstr.begin(), IE = MemInstr.end();
I != IE; ++
I) {
116 for (J =
I, JE = MemInstr.end(); J != JE; ++J) {
117 std::vector<char> Dep;
121 if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
124 if (
auto D = DI->
depends(Src, Dst,
true)) {
125 assert(
D->isOrdered() &&
"Expected an output, flow or anti dep.");
128 if (
D->normalize(SE))
131 D->isFlow() ?
"flow" :
D->isAnti() ?
"anti" :
"output";
132 dbgs() <<
"Found " << DepType
133 <<
" dependency between Src and Dst\n"
134 <<
" Src:" << *Src <<
"\n Dst:" << *Dst <<
'\n');
135 unsigned Levels =
D->getLevels();
137 for (
unsigned II = 1;
II <= Levels; ++
II) {
138 if (
D->isScalar(
II)) {
142 unsigned Dir =
D->getDirection(
II);
156 while (Dep.size() != Level) {
160 DepMatrix.push_back(Dep);
163 <<
" dependencies inside loop\n");
177 for (
unsigned I = 0, E = DepMatrix.size();
I < E; ++
I)
178 std::swap(DepMatrix[
I][ToIndx], DepMatrix[
I][FromIndx]);
197 unsigned InnerLoopId,
198 unsigned OuterLoopId) {
199 unsigned NumRows = DepMatrix.size();
200 std::vector<char> Cur;
202 for (
unsigned Row = 0; Row < NumRows; ++Row) {
205 Cur = DepMatrix[Row];
208 std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
217 << L.getHeader()->getParent()->getName() <<
" Loop: %"
218 << L.getHeader()->getName() <<
'\n');
219 assert(LoopList.empty() &&
"LoopList should initially be empty!");
220 Loop *CurrentLoop = &L;
221 const std::vector<Loop *> *Vec = &CurrentLoop->
getSubLoops();
222 while (!Vec->empty()) {
226 if (Vec->size() != 1) {
231 LoopList.push_back(CurrentLoop);
232 CurrentLoop = Vec->front();
235 LoopList.push_back(CurrentLoop);
241class LoopInterchangeLegality {
245 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
248 bool canInterchangeLoops(
unsigned InnerLoopId,
unsigned OuterLoopId,
249 CharMatrix &DepMatrix);
257 bool isLoopStructureUnderstood();
259 bool currentLimitations();
262 return OuterInnerReductions;
266 return InnerLoopInductions;
270 bool tightlyNested(
Loop *Outer,
Loop *Inner);
271 bool containsUnsafeInstructions(
BasicBlock *BB);
277 bool findInductionAndReductions(
Loop *L,
299class LoopInterchangeProfitability {
303 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
306 bool isProfitable(
const Loop *InnerLoop,
const Loop *OuterLoop,
307 unsigned InnerLoopId,
unsigned OuterLoopId,
308 CharMatrix &DepMatrix,
310 std::unique_ptr<CacheCost> &
CC);
313 int getInstrOrderCost();
314 std::optional<bool> isProfitablePerLoopCacheAnalysis(
316 std::unique_ptr<CacheCost> &
CC);
317 std::optional<bool> isProfitablePerInstrOrderCost();
318 std::optional<bool> isProfitableForVectorization(
unsigned InnerLoopId,
319 unsigned OuterLoopId,
320 CharMatrix &DepMatrix);
332class LoopInterchangeTransform {
336 const LoopInterchangeLegality &LIL)
337 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
341 void restructureLoops(
Loop *NewInner,
Loop *NewOuter,
344 void removeChildLoop(
Loop *OuterLoop,
Loop *InnerLoop);
347 bool adjustLoopLinks();
348 bool adjustLoopBranches();
359 const LoopInterchangeLegality &LIL;
362struct LoopInterchange {
367 std::unique_ptr<CacheCost>
CC =
nullptr;
375 : SE(SE), LI(LI), DI(DI), DT(DT),
CC(
std::
move(
CC)), ORE(ORE) {}
378 if (
L->getParentLoop())
382 return processLoopList(LoopList);
387 for (
unsigned I = 1;
I < LoopList.size(); ++
I)
388 if (LoopList[
I]->getParentLoop() != LoopList[
I - 1])
390 return processLoopList(LoopList);
394 for (
Loop *L : LoopList) {
396 if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
400 if (
L->getNumBackEdges() != 1) {
404 if (!
L->getExitingBlock()) {
415 return LoopList.
size() - 1;
419 bool Changed =
false;
420 unsigned LoopNestDepth = LoopList.
size();
421 if (LoopNestDepth < 2) {
422 LLVM_DEBUG(
dbgs() <<
"Loop doesn't contain minimum nesting level.\n");
430 if (!isComputableLoopNest(LoopList)) {
431 LLVM_DEBUG(
dbgs() <<
"Not valid loop candidate for interchange\n");
435 LLVM_DEBUG(
dbgs() <<
"Processing LoopList of size = " << LoopNestDepth
438 CharMatrix DependencyMatrix;
439 Loop *OuterMostLoop = *(LoopList.
begin());
441 OuterMostLoop, DI, SE)) {
445#ifdef DUMP_DEP_MATRICIES
447 printDepMatrix(DependencyMatrix);
457 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
468 const auto &LoopCosts =
CC->getLoopCosts();
469 for (
unsigned i = 0; i < LoopCosts.size(); i++) {
470 CostMap[LoopCosts[i].first] = i;
477 for (
unsigned j = SelecLoopId;
j > 0;
j--) {
478 bool ChangedPerIter =
false;
479 for (
unsigned i = SelecLoopId; i > SelecLoopId -
j; i--) {
480 bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
481 DependencyMatrix, CostMap);
488#ifdef DUMP_DEP_MATRICIES
490 printDepMatrix(DependencyMatrix);
492 ChangedPerIter |= Interchanged;
493 Changed |= Interchanged;
503 bool processLoop(
Loop *InnerLoop,
Loop *OuterLoop,
unsigned InnerLoopId,
504 unsigned OuterLoopId,
505 std::vector<std::vector<char>> &DependencyMatrix,
508 <<
" and OuterLoopId = " << OuterLoopId <<
"\n");
509 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
510 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
511 LLVM_DEBUG(
dbgs() <<
"Not interchanging loops. Cannot prove legality.\n");
515 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
516 if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
517 DependencyMatrix, CostMap, CC)) {
526 <<
"Loop interchanged with enclosing loop.";
529 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
541bool LoopInterchangeLegality::containsUnsafeInstructions(
BasicBlock *BB) {
543 return I.mayHaveSideEffects() ||
I.mayReadFromMemory();
547bool LoopInterchangeLegality::tightlyNested(
Loop *OuterLoop,
Loop *InnerLoop) {
559 if (!OuterLoopHeaderBI)
563 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->
getHeader() &&
564 Succ != OuterLoopLatch)
567 LLVM_DEBUG(
dbgs() <<
"Checking instructions in Loop header and Loop latch\n");
570 if (containsUnsafeInstructions(OuterLoopHeader) ||
571 containsUnsafeInstructions(OuterLoopLatch))
577 if (InnerLoopPreHeader != OuterLoopHeader &&
578 containsUnsafeInstructions(InnerLoopPreHeader))
586 if (&SuccInner != OuterLoopLatch) {
588 <<
" does not lead to the outer loop latch.\n";);
594 if (containsUnsafeInstructions(InnerLoopExit))
602bool LoopInterchangeLegality::isLoopStructureUnderstood() {
604 for (
PHINode *InnerInduction : InnerLoopInductions) {
605 unsigned Num = InnerInduction->getNumOperands();
606 for (
unsigned i = 0; i < Num; ++i) {
607 Value *Val = InnerInduction->getOperand(i);
608 if (isa<Constant>(Val))
617 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
618 InnerLoopPreheader &&
638 Value *Op0 = InnerLoopCmp->getOperand(0);
639 Value *Op1 = InnerLoopCmp->getOperand(1);
650 std::function<
bool(
Value *)> IsPathToInnerIndVar;
651 IsPathToInnerIndVar = [
this, &IsPathToInnerIndVar](
const Value *
V) ->
bool {
654 if (isa<Constant>(V))
659 if (isa<CastInst>(
I))
660 return IsPathToInnerIndVar(
I->getOperand(0));
661 if (isa<BinaryOperator>(
I))
662 return IsPathToInnerIndVar(
I->getOperand(0)) &&
663 IsPathToInnerIndVar(
I->getOperand(1));
669 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
674 if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
677 }
else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
700 if (
PHI->getNumIncomingValues() != 1)
708 if (isa<Constant>(V))
713 if (
PHI->getNumIncomingValues() == 1)
729bool LoopInterchangeLegality::findInductionAndReductions(
731 if (!
L->getLoopLatch() || !
L->getLoopPredecessor())
741 if (!OuterInnerReductions.count(&
PHI)) {
743 "across the outer loop.\n");
747 assert(
PHI.getNumIncomingValues() == 2 &&
748 "Phis in loop header should have exactly 2 incoming values");
757 <<
"Failed to recognize PHI as an induction or reduction.\n");
760 OuterInnerReductions.insert(&
PHI);
761 OuterInnerReductions.insert(InnerRedPhi);
770bool LoopInterchangeLegality::currentLimitations() {
780 dbgs() <<
"Loops where the latch is not the exiting block are not"
781 <<
" supported currently.\n");
786 <<
"Loops where the latch is not the exiting block cannot be"
787 " interchange currently.";
793 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
795 dbgs() <<
"Only outer loops with induction or reduction PHI nodes "
796 <<
"are supported currently.\n");
801 <<
"Only outer loops with induction or reduction PHI nodes can be"
802 " interchanged currently.";
811 Loop *CurLevelLoop = OuterLoop;
814 CurLevelLoop = CurLevelLoop->
getSubLoops().front();
815 if (!findInductionAndReductions(CurLevelLoop, Inductions,
nullptr)) {
817 dbgs() <<
"Only inner loops with induction or reduction PHI nodes "
818 <<
"are supported currently.\n");
823 <<
"Only inner loops with induction or reduction PHI nodes can be"
824 " interchange currently.";
831 if (!isLoopStructureUnderstood()) {
837 <<
"Inner loop structure not understood currently.";
845bool LoopInterchangeLegality::findInductions(
852 return !Inductions.
empty();
864 if (
PHI.getNumIncomingValues() > 1)
867 PHINode *PN = dyn_cast<PHINode>(U);
869 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
887 for (
unsigned i = 0; i <
PHI.getNumIncomingValues(); i++) {
888 Instruction *IncomingI = dyn_cast<Instruction>(
PHI.getIncomingValue(i));
934 for (
auto *U :
PHI.users()) {
943bool LoopInterchangeLegality::canInterchangeLoops(
unsigned InnerLoopId,
944 unsigned OuterLoopId,
945 CharMatrix &DepMatrix) {
947 LLVM_DEBUG(
dbgs() <<
"Failed interchange InnerLoopId = " << InnerLoopId
948 <<
" and OuterLoopId = " << OuterLoopId
949 <<
" due to dependence\n");
954 <<
"Cannot interchange loops due to dependences.";
959 for (
auto *BB : OuterLoop->
blocks())
961 if (
CallInst *CI = dyn_cast<CallInst>(&
I)) {
963 if (CI->onlyWritesMemory())
966 dbgs() <<
"Loops with call instructions cannot be interchanged "
972 <<
"Cannot interchange loops due to call instruction.";
978 if (!findInductions(InnerLoop, InnerLoopInductions)) {
979 LLVM_DEBUG(
dbgs() <<
"Could not find inner loop induction variables.\n");
984 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in inner loop latch.\n");
989 <<
"Cannot interchange loops because unsupported PHI nodes found "
990 "in inner loop latch.";
997 if (currentLimitations()) {
998 LLVM_DEBUG(
dbgs() <<
"Not legal because of current transform limitation\n");
1003 if (!tightlyNested(OuterLoop, InnerLoop)) {
1009 <<
"Cannot interchange loops because they are not tightly "
1016 OuterInnerReductions)) {
1017 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in inner loop exit.\n");
1022 <<
"Found unsupported PHI node in loop exit.";
1028 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in outer loop exit.\n");
1033 <<
"Found unsupported PHI node in loop exit.";
1041int LoopInterchangeProfitability::getInstrOrderCost() {
1042 unsigned GoodOrder, BadOrder;
1043 BadOrder = GoodOrder = 0;
1047 unsigned NumOp =
GEP->getNumOperands();
1048 bool FoundInnerInduction =
false;
1049 bool FoundOuterInduction =
false;
1050 for (
unsigned i = 0; i < NumOp; ++i) {
1065 if (AR->
getLoop() == InnerLoop) {
1068 FoundInnerInduction =
true;
1069 if (FoundOuterInduction) {
1079 if (AR->
getLoop() == OuterLoop) {
1082 FoundOuterInduction =
true;
1083 if (FoundInnerInduction) {
1092 return GoodOrder - BadOrder;
1096LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
1098 std::unique_ptr<CacheCost> &
CC) {
1103 unsigned InnerIndex = 0, OuterIndex = 0;
1104 InnerIndex = CostMap.
find(InnerLoop)->second;
1105 OuterIndex = CostMap.
find(OuterLoop)->second;
1107 <<
", OuterIndex = " << OuterIndex <<
"\n");
1108 if (InnerIndex < OuterIndex)
1109 return std::optional<bool>(
true);
1110 assert(InnerIndex != OuterIndex &&
"CostMap should assign unique "
1111 "numbers to each loop");
1112 if (
CC->getLoopCost(*OuterLoop) ==
CC->getLoopCost(*InnerLoop))
1113 return std::nullopt;
1114 return std::optional<bool>(
false);
1116 return std::nullopt;
1120LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
1124 int Cost = getInstrOrderCost();
1127 return std::optional<bool>(
true);
1129 return std::nullopt;
1132std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1133 unsigned InnerLoopId,
unsigned OuterLoopId, CharMatrix &DepMatrix) {
1134 for (
auto &Row : DepMatrix) {
1138 if (Row[InnerLoopId] ==
'I' || Row[InnerLoopId] ==
'=')
1139 return std::optional<bool>(
false);
1144 if (Row[OuterLoopId] !=
'I' && Row[OuterLoopId] !=
'=')
1145 return std::optional<bool>(
false);
1150 return std::optional<bool>(!DepMatrix.empty());
1153bool LoopInterchangeProfitability::isProfitable(
1154 const Loop *InnerLoop,
const Loop *OuterLoop,
unsigned InnerLoopId,
1155 unsigned OuterLoopId, CharMatrix &DepMatrix,
1157 std::unique_ptr<CacheCost> &
CC) {
1166 std::optional<bool> shouldInterchange =
1167 isProfitablePerLoopCacheAnalysis(CostMap,
CC);
1168 if (!shouldInterchange.has_value()) {
1169 shouldInterchange = isProfitablePerInstrOrderCost();
1170 if (!shouldInterchange.has_value())
1172 isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
1174 if (!shouldInterchange.has_value()) {
1179 <<
"Insufficient information to calculate the cost of loop for "
1183 }
else if (!shouldInterchange.value()) {
1188 <<
"Interchanging loops is not considered to improve cache "
1189 "locality nor vectorization.";
1196void LoopInterchangeTransform::removeChildLoop(
Loop *OuterLoop,
1198 for (
Loop *L : *OuterLoop)
1199 if (L == InnerLoop) {
1200 OuterLoop->removeChildLoop(L);
1229void LoopInterchangeTransform::restructureLoops(
1239 if (OuterLoopParent) {
1241 removeChildLoop(OuterLoopParent, NewInner);
1242 removeChildLoop(NewInner, NewOuter);
1245 removeChildLoop(NewInner, NewOuter);
1270 if (BB == OuterHeader || BB == OuterLatch)
1285bool LoopInterchangeTransform::transform() {
1286 bool Transformed =
false;
1291 auto &InductionPHIs = LIL.getInnerLoopInductions();
1292 if (InductionPHIs.empty()) {
1293 LLVM_DEBUG(
dbgs() <<
"Failed to find the point to split loop latch \n");
1298 for (
PHINode *CurInductionPHI : InductionPHIs) {
1299 if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1301 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
1304 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
1317 auto MoveInstructions = [&i, &WorkList,
this, &InductionPHIs, NewLatch]() {
1318 for (; i < WorkList.
size(); i++) {
1324 "Moving instructions with side-effects may change behavior of "
1335 for (
Value *
Op : WorkList[i]->operands()) {
1353 for (
Instruction *InnerIndexVar : InnerIndexVarList)
1354 WorkList.
insert(cast<Instruction>(InnerIndexVar));
1371 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1372 if (InnerLoopPreHeader != OuterLoopHeader) {
1376 std::prev(InnerLoopPreHeader->
end()))))
1380 Transformed |= adjustLoopLinks();
1405 I->removeFromParent();
1420 std::vector<DominatorTree::UpdateType> &DTUpdates,
1421 bool MustUpdateOnce =
true) {
1422 assert((!MustUpdateOnce ||
1426 }) == 1) &&
"BI must jump to OldBB exactly once.");
1427 bool Changed =
false;
1435 DTUpdates.push_back(
1436 {DominatorTree::UpdateKind::Insert, BI->
getParent(), NewBB});
1437 DTUpdates.push_back(
1438 {DominatorTree::UpdateKind::Delete, BI->
getParent(), OldBB});
1440 assert(Changed &&
"Expected a successor to be updated");
1457 assert(
P.getNumIncomingValues() == 1 &&
1458 "Only loops with a single exit are supported!");
1461 auto IncI = cast<Instruction>(
P.getIncomingValueForBlock(InnerLatch));
1464 auto IncIInnerMost = cast<Instruction>(
followLCSSA(IncI));
1467 if (IncIInnerMost->getParent() != InnerLatch &&
1468 IncIInnerMost->
getParent() != InnerHeader)
1472 [OuterHeader, OuterExit, IncI, InnerHeader](
User *U) {
1473 return (cast<PHINode>(U)->getParent() == OuterHeader &&
1474 IncI->getParent() == InnerHeader) ||
1475 cast<PHINode>(U)->getParent() == OuterExit;
1477 "Can only replace phis iff the uses are in the loop nest exit or "
1478 "the incoming value is defined in the inner header (it will "
1479 "dominate all loop blocks after interchanging)");
1480 P.replaceAllUsesWith(IncI);
1481 P.eraseFromParent();
1511 if (
P.getNumIncomingValues() != 1)
1515 auto I = dyn_cast<Instruction>(
P.getIncomingValue(0));
1519 PHINode *NewPhi = dyn_cast<PHINode>(
P.clone());
1525 if (Pred == OuterLatch)
1530 P.setIncomingValue(0, NewPhi);
1540bool LoopInterchangeTransform::adjustLoopBranches() {
1542 std::vector<DominatorTree::UpdateType> DTUpdates;
1544 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1547 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1548 InnerLoopPreHeader != InnerLoop->
getHeader() && OuterLoopPreHeader &&
1549 InnerLoopPreHeader &&
"Guaranteed by loop-simplify form");
1554 if (isa<PHINode>(OuterLoopPreHeader->
begin()) ||
1556 OuterLoopPreHeader =
1558 if (InnerLoopPreHeader == OuterLoop->getHeader())
1559 InnerLoopPreHeader =
1564 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1566 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1582 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1583 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1588 dyn_cast<BranchInst>(InnerLoopLatchPredecessor->
getTerminator());
1590 dyn_cast<BranchInst>(OuterLoopPredecessor->
getTerminator());
1592 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1595 if (!InnerLoopHeaderSuccessor)
1603 InnerLoopPreHeader, DTUpdates,
false);
1613 InnerLoopHeaderSuccessor, DTUpdates,
1621 OuterLoopPreHeader, DTUpdates);
1624 if (InnerLoopLatchBI->
getSuccessor(0) == InnerLoopHeader)
1625 InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(1);
1627 InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(0);
1630 InnerLoopLatchSuccessor, DTUpdates);
1632 if (OuterLoopLatchBI->
getSuccessor(0) == OuterLoopHeader)
1633 OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(1);
1635 OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(0);
1638 OuterLoopLatchSuccessor, DTUpdates);
1639 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1643 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1644 OuterLoopPreHeader);
1646 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1647 OuterLoopHeader, OuterLoopLatch, InnerLoop->
getExitBlock(),
1653 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1657 if (OuterInnerReductions.contains(&
PHI))
1661 if (OuterInnerReductions.contains(&
PHI))
1670 assert(OuterInnerReductions.count(
PHI) &&
"Expected a reduction PHI node");
1675 assert(OuterInnerReductions.count(
PHI) &&
"Expected a reduction PHI node");
1697bool LoopInterchangeTransform::adjustLoopLinks() {
1699 bool Changed = adjustLoopBranches();
1704 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1718 std::unique_ptr<CacheCost>
CC =
1721 if (!LoopInterchange(&AR.
SE, &AR.
LI, &DI, &AR.
DT,
CC, &ORE).run(LN))
1723 U.markLoopNestChanged(
true);
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass
This file defines the interface for the loop cache analysis.
Loop::LoopBounds::Direction Direction
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
static PHINode * findInnerReductionPhi(Loop *L, Value *V)
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore)
Move all instructions except the terminator from FromBB right before InsertBefore.
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
static const unsigned MaxLoopNestDepth
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
static bool areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, SmallPtrSetImpl< PHINode * > &Reductions)
static const unsigned MaxMemInstrCount
static Value * followLCSSA(Value *SV)
static void populateWorklist(Loop &L, LoopVector &LoopList)
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
static bool isLexicographicallyPositive(std::vector< char > &DV)
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE)
This file defines the interface for the loop nest analysis.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
uint64_t IntrinsicInst * II
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
DependenceInfo - This class is the main dependence-analysis driver.
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End, bool CheckUniquePred=false)
Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Function * getParent() const
Return the function to which the loop-nest belongs.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
static unsigned getIncomingValueNumForOperand(unsigned i)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
This node represents a polynomial recurrence on the trip count of the specified loop.
const Loop * getLoop() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto successors(const MachineBasicBlock *BB)
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto map_range(ContainerTy &&C, FuncTy F)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Implement std::hash so that hash_code can be used in STL containers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Direction
An enum for the direction of the loop.