52#define DEBUG_TYPE "loop-interchange"
54STATISTIC(LoopsInterchanged,
"Number of loops interchanged");
58 cl::desc(
"Interchange if you gain more than this number"));
64 "Maximum number of load-store instructions that should be handled "
65 "in the dependency matrix. Higher value may lead to more interchanges "
66 "at the cost of compile-time"));
73using CharMatrix = std::vector<std::vector<char>>;
80 cl::desc(
"Minimum depth of loop nest considered for the transform"));
85 cl::desc(
"Maximum depth of loop nest considered for the transform"));
89 for (
auto &Row : DepMatrix) {
103 ValueVector MemInstr;
109 if (!isa<Instruction>(
I))
111 if (
auto *Ld = dyn_cast<LoadInst>(&
I)) {
115 }
else if (
auto *St = dyn_cast<StoreInst>(&
I)) {
118 MemInstr.push_back(&
I);
124 <<
" Loads and Stores to analyze\n");
130 L->getStartLoc(), L->getHeader())
131 <<
"Number of loads/stores exceeded, the supported maximum "
132 "can be increased with option "
133 "-loop-interchange-maxmeminstr-count.";
137 ValueVector::iterator
I, IE, J, JE;
140 for (
I = MemInstr.begin(), IE = MemInstr.end();
I != IE; ++
I) {
141 for (J =
I, JE = MemInstr.end(); J != JE; ++J) {
142 std::vector<char> Dep;
146 if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
149 if (
auto D = DI->
depends(Src, Dst,
true)) {
150 assert(
D->isOrdered() &&
"Expected an output, flow or anti dep.");
153 if (
D->normalize(SE))
156 D->isFlow() ?
"flow" :
D->isAnti() ?
"anti" :
"output";
157 dbgs() <<
"Found " << DepType
158 <<
" dependency between Src and Dst\n"
159 <<
" Src:" << *Src <<
"\n Dst:" << *Dst <<
'\n');
160 unsigned Levels =
D->getLevels();
162 for (
unsigned II = 1;
II <= Levels; ++
II) {
163 unsigned Dir =
D->getDirection(
II);
175 while (Dep.size() != Level) {
181 DepMatrix.push_back(Dep);
193 for (
unsigned I = 0, E = DepMatrix.size();
I < E; ++
I)
194 std::swap(DepMatrix[
I][ToIndx], DepMatrix[
I][FromIndx]);
213 unsigned InnerLoopId,
214 unsigned OuterLoopId) {
215 unsigned NumRows = DepMatrix.size();
216 std::vector<char> Cur;
218 for (
unsigned Row = 0; Row < NumRows; ++Row) {
221 Cur = DepMatrix[Row];
224 std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
233 << L.getHeader()->getParent()->getName() <<
" Loop: %"
234 << L.getHeader()->getName() <<
'\n');
235 assert(LoopList.empty() &&
"LoopList should initially be empty!");
236 Loop *CurrentLoop = &L;
237 const std::vector<Loop *> *Vec = &CurrentLoop->
getSubLoops();
238 while (!Vec->empty()) {
242 if (Vec->size() != 1) {
247 LoopList.push_back(CurrentLoop);
248 CurrentLoop = Vec->front();
251 LoopList.push_back(CurrentLoop);
256 unsigned LoopNestDepth = LoopList.
size();
258 LLVM_DEBUG(
dbgs() <<
"Unsupported depth of loop nest " << LoopNestDepth
264 (*OuterLoop)->getStartLoc(),
265 (*OuterLoop)->getHeader())
266 <<
"Unsupported depth of loop nest, the supported range is ["
277class LoopInterchangeLegality {
281 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
284 bool canInterchangeLoops(
unsigned InnerLoopId,
unsigned OuterLoopId,
285 CharMatrix &DepMatrix);
293 bool isLoopStructureUnderstood();
295 bool currentLimitations();
298 return OuterInnerReductions;
302 return InnerLoopInductions;
306 bool tightlyNested(
Loop *Outer,
Loop *Inner);
307 bool containsUnsafeInstructions(
BasicBlock *BB);
313 bool findInductionAndReductions(
Loop *L,
335class LoopInterchangeProfitability {
339 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
343 unsigned InnerLoopId,
unsigned OuterLoopId,
344 CharMatrix &DepMatrix,
346 std::unique_ptr<CacheCost> &
CC);
349 int getInstrOrderCost();
350 std::optional<bool> isProfitablePerLoopCacheAnalysis(
352 std::unique_ptr<CacheCost> &
CC);
353 std::optional<bool> isProfitablePerInstrOrderCost();
354 std::optional<bool> isProfitableForVectorization(
unsigned InnerLoopId,
355 unsigned OuterLoopId,
356 CharMatrix &DepMatrix);
368class LoopInterchangeTransform {
372 const LoopInterchangeLegality &LIL)
373 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
377 void restructureLoops(
Loop *NewInner,
Loop *NewOuter,
380 void removeChildLoop(
Loop *OuterLoop,
Loop *InnerLoop);
383 bool adjustLoopLinks();
384 bool adjustLoopBranches();
395 const LoopInterchangeLegality &LIL;
398struct LoopInterchange {
403 std::unique_ptr<CacheCost>
CC =
nullptr;
411 : SE(SE), LI(LI), DI(DI), DT(DT),
CC(
std::
move(
CC)), ORE(ORE) {}
414 if (
L->getParentLoop())
418 return processLoopList(LoopList);
423 for (
unsigned I = 1;
I < LoopList.size(); ++
I)
424 if (LoopList[
I]->getParentLoop() != LoopList[
I - 1])
426 return processLoopList(LoopList);
430 for (
Loop *L : LoopList) {
432 if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
436 if (
L->getNumBackEdges() != 1) {
440 if (!
L->getExitingBlock()) {
451 return LoopList.
size() - 1;
455 bool Changed =
false;
459 "Unsupported depth of loop nest.");
461 unsigned LoopNestDepth = LoopList.
size();
462 if (!isComputableLoopNest(LoopList)) {
463 LLVM_DEBUG(
dbgs() <<
"Not valid loop candidate for interchange\n");
467 LLVM_DEBUG(
dbgs() <<
"Processing LoopList of size = " << LoopNestDepth
470 CharMatrix DependencyMatrix;
471 Loop *OuterMostLoop = *(LoopList.
begin());
473 OuterMostLoop, DI, SE, ORE)) {
488 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
499 const auto &LoopCosts =
CC->getLoopCosts();
500 for (
unsigned i = 0; i < LoopCosts.size(); i++) {
501 CostMap[LoopCosts[i].first] = i;
508 for (
unsigned j = SelecLoopId;
j > 0;
j--) {
509 bool ChangedPerIter =
false;
510 for (
unsigned i = SelecLoopId; i > SelecLoopId -
j; i--) {
511 bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
512 DependencyMatrix, CostMap);
523 ChangedPerIter |= Interchanged;
524 Changed |= Interchanged;
534 bool processLoop(
Loop *InnerLoop,
Loop *OuterLoop,
unsigned InnerLoopId,
535 unsigned OuterLoopId,
536 std::vector<std::vector<char>> &DependencyMatrix,
539 <<
" and OuterLoopId = " << OuterLoopId <<
"\n");
540 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
541 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
542 LLVM_DEBUG(
dbgs() <<
"Not interchanging loops. Cannot prove legality.\n");
546 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
547 if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
548 DependencyMatrix, CostMap, CC)) {
557 <<
"Loop interchanged with enclosing loop.";
560 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
572bool LoopInterchangeLegality::containsUnsafeInstructions(
BasicBlock *BB) {
574 return I.mayHaveSideEffects() ||
I.mayReadFromMemory();
578bool LoopInterchangeLegality::tightlyNested(
Loop *OuterLoop,
Loop *InnerLoop) {
590 if (!OuterLoopHeaderBI)
594 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->
getHeader() &&
595 Succ != OuterLoopLatch)
598 LLVM_DEBUG(
dbgs() <<
"Checking instructions in Loop header and Loop latch\n");
601 if (containsUnsafeInstructions(OuterLoopHeader) ||
602 containsUnsafeInstructions(OuterLoopLatch))
608 if (InnerLoopPreHeader != OuterLoopHeader &&
609 containsUnsafeInstructions(InnerLoopPreHeader))
617 if (&SuccInner != OuterLoopLatch) {
619 <<
" does not lead to the outer loop latch.\n";);
625 if (containsUnsafeInstructions(InnerLoopExit))
633bool LoopInterchangeLegality::isLoopStructureUnderstood() {
635 for (
PHINode *InnerInduction : InnerLoopInductions) {
636 unsigned Num = InnerInduction->getNumOperands();
637 for (
unsigned i = 0; i < Num; ++i) {
638 Value *Val = InnerInduction->getOperand(i);
639 if (isa<Constant>(Val))
648 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
649 InnerLoopPreheader &&
669 Value *Op0 = InnerLoopCmp->getOperand(0);
670 Value *Op1 = InnerLoopCmp->getOperand(1);
681 std::function<
bool(
Value *)> IsPathToInnerIndVar;
682 IsPathToInnerIndVar = [
this, &IsPathToInnerIndVar](
const Value *
V) ->
bool {
685 if (isa<Constant>(V))
690 if (isa<CastInst>(
I))
691 return IsPathToInnerIndVar(
I->getOperand(0));
692 if (isa<BinaryOperator>(
I))
693 return IsPathToInnerIndVar(
I->getOperand(0)) &&
694 IsPathToInnerIndVar(
I->getOperand(1));
700 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
705 if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
708 }
else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
731 if (
PHI->getNumIncomingValues() != 1)
739 if (isa<Constant>(V))
744 if (
PHI->getNumIncomingValues() == 1)
760bool LoopInterchangeLegality::findInductionAndReductions(
762 if (!
L->getLoopLatch() || !
L->getLoopPredecessor())
772 if (!OuterInnerReductions.count(&
PHI)) {
774 "across the outer loop.\n");
778 assert(
PHI.getNumIncomingValues() == 2 &&
779 "Phis in loop header should have exactly 2 incoming values");
788 <<
"Failed to recognize PHI as an induction or reduction.\n");
791 OuterInnerReductions.insert(&
PHI);
792 OuterInnerReductions.insert(InnerRedPhi);
801bool LoopInterchangeLegality::currentLimitations() {
811 dbgs() <<
"Loops where the latch is not the exiting block are not"
812 <<
" supported currently.\n");
817 <<
"Loops where the latch is not the exiting block cannot be"
818 " interchange currently.";
824 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
826 dbgs() <<
"Only outer loops with induction or reduction PHI nodes "
827 <<
"are supported currently.\n");
832 <<
"Only outer loops with induction or reduction PHI nodes can be"
833 " interchanged currently.";
842 Loop *CurLevelLoop = OuterLoop;
845 CurLevelLoop = CurLevelLoop->
getSubLoops().front();
846 if (!findInductionAndReductions(CurLevelLoop, Inductions,
nullptr)) {
848 dbgs() <<
"Only inner loops with induction or reduction PHI nodes "
849 <<
"are supported currently.\n");
854 <<
"Only inner loops with induction or reduction PHI nodes can be"
855 " interchange currently.";
862 if (!isLoopStructureUnderstood()) {
868 <<
"Inner loop structure not understood currently.";
876bool LoopInterchangeLegality::findInductions(
883 return !Inductions.
empty();
895 if (
PHI.getNumIncomingValues() > 1)
898 PHINode *PN = dyn_cast<PHINode>(U);
900 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
918 for (
unsigned i = 0; i <
PHI.getNumIncomingValues(); i++) {
919 Instruction *IncomingI = dyn_cast<Instruction>(
PHI.getIncomingValue(i));
965 for (
auto *U :
PHI.users()) {
974bool LoopInterchangeLegality::canInterchangeLoops(
unsigned InnerLoopId,
975 unsigned OuterLoopId,
976 CharMatrix &DepMatrix) {
978 LLVM_DEBUG(
dbgs() <<
"Failed interchange InnerLoopId = " << InnerLoopId
979 <<
" and OuterLoopId = " << OuterLoopId
980 <<
" due to dependence\n");
985 <<
"Cannot interchange loops due to dependences.";
990 for (
auto *BB : OuterLoop->
blocks())
992 if (
CallInst *CI = dyn_cast<CallInst>(&
I)) {
994 if (CI->onlyWritesMemory())
997 dbgs() <<
"Loops with call instructions cannot be interchanged "
1003 <<
"Cannot interchange loops due to call instruction.";
1009 if (!findInductions(InnerLoop, InnerLoopInductions)) {
1010 LLVM_DEBUG(
dbgs() <<
"Could not find inner loop induction variables.\n");
1015 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in inner loop latch.\n");
1020 <<
"Cannot interchange loops because unsupported PHI nodes found "
1021 "in inner loop latch.";
1028 if (currentLimitations()) {
1029 LLVM_DEBUG(
dbgs() <<
"Not legal because of current transform limitation\n");
1034 if (!tightlyNested(OuterLoop, InnerLoop)) {
1040 <<
"Cannot interchange loops because they are not tightly "
1047 OuterInnerReductions)) {
1048 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in inner loop exit.\n");
1053 <<
"Found unsupported PHI node in loop exit.";
1059 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in outer loop exit.\n");
1064 <<
"Found unsupported PHI node in loop exit.";
1072int LoopInterchangeProfitability::getInstrOrderCost() {
1073 unsigned GoodOrder, BadOrder;
1074 BadOrder = GoodOrder = 0;
1078 unsigned NumOp =
GEP->getNumOperands();
1079 bool FoundInnerInduction =
false;
1080 bool FoundOuterInduction =
false;
1081 for (
unsigned i = 0; i < NumOp; ++i) {
1096 if (AR->
getLoop() == InnerLoop) {
1099 FoundInnerInduction =
true;
1100 if (FoundOuterInduction) {
1110 if (AR->
getLoop() == OuterLoop) {
1113 FoundOuterInduction =
true;
1114 if (FoundInnerInduction) {
1123 return GoodOrder - BadOrder;
1127LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
1129 std::unique_ptr<CacheCost> &
CC) {
1134 unsigned InnerIndex = 0, OuterIndex = 0;
1135 InnerIndex = CostMap.
find(InnerLoop)->second;
1136 OuterIndex = CostMap.
find(OuterLoop)->second;
1138 <<
", OuterIndex = " << OuterIndex <<
"\n");
1139 if (InnerIndex < OuterIndex)
1140 return std::optional<bool>(
true);
1141 assert(InnerIndex != OuterIndex &&
"CostMap should assign unique "
1142 "numbers to each loop");
1143 if (
CC->getLoopCost(*OuterLoop) ==
CC->getLoopCost(*InnerLoop))
1144 return std::nullopt;
1145 return std::optional<bool>(
false);
1147 return std::nullopt;
1151LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
1155 int Cost = getInstrOrderCost();
1158 return std::optional<bool>(
true);
1160 return std::nullopt;
1163std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1164 unsigned InnerLoopId,
unsigned OuterLoopId, CharMatrix &DepMatrix) {
1165 for (
auto &Row : DepMatrix) {
1169 if (Row[InnerLoopId] ==
'I' || Row[InnerLoopId] ==
'=')
1170 return std::optional<bool>(
false);
1175 if (Row[OuterLoopId] !=
'I' && Row[OuterLoopId] !=
'=')
1176 return std::optional<bool>(
false);
1181 return std::optional<bool>(!DepMatrix.empty());
1184bool LoopInterchangeProfitability::isProfitable(
1185 const Loop *InnerLoop,
const Loop *OuterLoop,
unsigned InnerLoopId,
1186 unsigned OuterLoopId, CharMatrix &DepMatrix,
1188 std::unique_ptr<CacheCost> &
CC) {
1197 std::optional<bool> shouldInterchange =
1198 isProfitablePerLoopCacheAnalysis(CostMap,
CC);
1199 if (!shouldInterchange.has_value()) {
1200 shouldInterchange = isProfitablePerInstrOrderCost();
1201 if (!shouldInterchange.has_value())
1203 isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
1205 if (!shouldInterchange.has_value()) {
1210 <<
"Insufficient information to calculate the cost of loop for "
1214 }
else if (!shouldInterchange.value()) {
1219 <<
"Interchanging loops is not considered to improve cache "
1220 "locality nor vectorization.";
1227void LoopInterchangeTransform::removeChildLoop(
Loop *OuterLoop,
1229 for (
Loop *L : *OuterLoop)
1230 if (L == InnerLoop) {
1231 OuterLoop->removeChildLoop(L);
1260void LoopInterchangeTransform::restructureLoops(
1270 if (OuterLoopParent) {
1272 removeChildLoop(OuterLoopParent, NewInner);
1273 removeChildLoop(NewInner, NewOuter);
1276 removeChildLoop(NewInner, NewOuter);
1301 if (BB == OuterHeader || BB == OuterLatch)
1316bool LoopInterchangeTransform::transform() {
1317 bool Transformed =
false;
1322 auto &InductionPHIs = LIL.getInnerLoopInductions();
1323 if (InductionPHIs.empty()) {
1324 LLVM_DEBUG(
dbgs() <<
"Failed to find the point to split loop latch \n");
1329 for (
PHINode *CurInductionPHI : InductionPHIs) {
1330 if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1332 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
1335 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
1348 auto MoveInstructions = [&i, &WorkList,
this, &InductionPHIs, NewLatch]() {
1349 for (; i < WorkList.
size(); i++) {
1355 "Moving instructions with side-effects may change behavior of "
1366 for (
Value *
Op : WorkList[i]->operands()) {
1384 for (
Instruction *InnerIndexVar : InnerIndexVarList)
1385 WorkList.
insert(cast<Instruction>(InnerIndexVar));
1402 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1403 if (InnerLoopPreHeader != OuterLoopHeader) {
1407 std::prev(InnerLoopPreHeader->
end()))))
1411 Transformed |= adjustLoopLinks();
1436 I->removeFromParent();
1451 std::vector<DominatorTree::UpdateType> &DTUpdates,
1452 bool MustUpdateOnce =
true) {
1453 assert((!MustUpdateOnce ||
1457 }) == 1) &&
"BI must jump to OldBB exactly once.");
1458 bool Changed =
false;
1466 DTUpdates.push_back(
1467 {DominatorTree::UpdateKind::Insert, BI->
getParent(), NewBB});
1468 DTUpdates.push_back(
1469 {DominatorTree::UpdateKind::Delete, BI->
getParent(), OldBB});
1471 assert(Changed &&
"Expected a successor to be updated");
1488 assert(
P.getNumIncomingValues() == 1 &&
1489 "Only loops with a single exit are supported!");
1492 auto IncI = cast<Instruction>(
P.getIncomingValueForBlock(InnerLatch));
1495 auto IncIInnerMost = cast<Instruction>(
followLCSSA(IncI));
1498 if (IncIInnerMost->getParent() != InnerLatch &&
1499 IncIInnerMost->
getParent() != InnerHeader)
1503 [OuterHeader, OuterExit, IncI, InnerHeader](
User *U) {
1504 return (cast<PHINode>(U)->getParent() == OuterHeader &&
1505 IncI->getParent() == InnerHeader) ||
1506 cast<PHINode>(U)->getParent() == OuterExit;
1508 "Can only replace phis iff the uses are in the loop nest exit or "
1509 "the incoming value is defined in the inner header (it will "
1510 "dominate all loop blocks after interchanging)");
1511 P.replaceAllUsesWith(IncI);
1512 P.eraseFromParent();
1542 if (
P.getNumIncomingValues() != 1)
1546 auto I = dyn_cast<Instruction>(
P.getIncomingValue(0));
1550 PHINode *NewPhi = dyn_cast<PHINode>(
P.clone());
1556 if (Pred == OuterLatch)
1561 P.setIncomingValue(0, NewPhi);
1571bool LoopInterchangeTransform::adjustLoopBranches() {
1573 std::vector<DominatorTree::UpdateType> DTUpdates;
1575 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1578 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1579 InnerLoopPreHeader != InnerLoop->
getHeader() && OuterLoopPreHeader &&
1580 InnerLoopPreHeader &&
"Guaranteed by loop-simplify form");
1585 if (isa<PHINode>(OuterLoopPreHeader->
begin()) ||
1587 OuterLoopPreHeader =
1589 if (InnerLoopPreHeader == OuterLoop->getHeader())
1590 InnerLoopPreHeader =
1595 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1597 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1613 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1614 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1619 dyn_cast<BranchInst>(InnerLoopLatchPredecessor->
getTerminator());
1621 dyn_cast<BranchInst>(OuterLoopPredecessor->
getTerminator());
1623 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1626 if (!InnerLoopHeaderSuccessor)
1634 InnerLoopPreHeader, DTUpdates,
false);
1644 InnerLoopHeaderSuccessor, DTUpdates,
1652 OuterLoopPreHeader, DTUpdates);
1655 if (InnerLoopLatchBI->
getSuccessor(0) == InnerLoopHeader)
1656 InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(1);
1658 InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(0);
1661 InnerLoopLatchSuccessor, DTUpdates);
1663 if (OuterLoopLatchBI->
getSuccessor(0) == OuterLoopHeader)
1664 OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(1);
1666 OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(0);
1669 OuterLoopLatchSuccessor, DTUpdates);
1670 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1674 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1675 OuterLoopPreHeader);
1677 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1678 OuterLoopHeader, OuterLoopLatch, InnerLoop->
getExitBlock(),
1684 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1688 if (OuterInnerReductions.contains(&
PHI))
1692 if (OuterInnerReductions.contains(&
PHI))
1701 assert(OuterInnerReductions.count(
PHI) &&
"Expected a reduction PHI node");
1706 assert(OuterInnerReductions.count(
PHI) &&
"Expected a reduction PHI node");
1728bool LoopInterchangeTransform::adjustLoopLinks() {
1730 bool Changed = adjustLoopBranches();
1735 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1759 std::unique_ptr<CacheCost>
CC =
1762 if (!LoopInterchange(&AR.
SE, &AR.
LI, &DI, &AR.
DT,
CC, &ORE).run(LN))
1764 U.markLoopNestChanged(
true);
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass
This file defines the interface for the loop cache analysis.
static cl::opt< unsigned int > MaxMemInstrCount("loop-interchange-max-meminstr-count", cl::init(64), cl::Hidden, cl::desc("Maximum number of load-store instructions that should be handled " "in the dependency matrix. Higher value may lead to more interchanges " "at the cost of compile-time"))
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
static PHINode * findInnerReductionPhi(Loop *L, Value *V)
static cl::opt< unsigned int > MinLoopNestDepth("loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden, cl::desc("Minimum depth of loop nest considered for the transform"))
static bool hasSupportedLoopDepth(SmallVectorImpl< Loop * > &LoopList, OptimizationRemarkEmitter &ORE)
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore)
Move all instructions except the terminator from FromBB right before InsertBefore.
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
static void printDepMatrix(CharMatrix &DepMatrix)
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
static cl::opt< unsigned int > MaxLoopNestDepth("loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden, cl::desc("Maximum depth of loop nest considered for the transform"))
static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
static bool areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, SmallPtrSetImpl< PHINode * > &Reductions)
static Value * followLCSSA(Value *SV)
static void populateWorklist(Loop &L, LoopVector &LoopList)
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE)
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
static bool isLexicographicallyPositive(std::vector< char > &DV)
This file defines the interface for the loop nest analysis.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
uint64_t IntrinsicInst * II
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
static bool isProfitable(const SmallVector< std::unique_ptr< StableFunctionMap::StableFunctionEntry > > &SFS)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
StringSet - A set-like wrapper for the StringMap.
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
DependenceInfo - This class is the main dependence-analysis driver.
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End, bool CheckUniquePred=false)
Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Function * getParent() const
Return the function to which the loop-nest belongs.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
static unsigned getIncomingValueNumForOperand(unsigned i)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
This node represents a polynomial recurrence on the trip count of the specified loop.
const Loop * getLoop() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
StringSet - A wrapper for StringMap that provides set-like functionality.
std::pair< typename Base::iterator, bool > insert(StringRef key)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto successors(const MachineBasicBlock *BB)
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto map_range(ContainerTy &&C, FuncTy F)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Implement std::hash so that hash_code can be used in STL containers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Direction
An enum for the direction of the loop.