55#define DEBUG_TYPE "loop-interchange"
57STATISTIC(LoopsInterchanged,
"Number of loops interchanged");
61 cl::desc(
"Interchange if you gain more than this number"));
65 cl::desc(
"Maximum number of load/store instructions squared in relation to "
66 "the total number of instructions. Higher value may lead to more "
67 "interchanges at the cost of compile-time"));
81using CharMatrix = std::vector<std::vector<char>>;
96 cl::desc(
"Minimum depth of loop nest considered for the transform"));
101 cl::desc(
"Maximum depth of loop nest considered for the transform"));
107 cl::desc(
"List of profitability heuristics to be used. They are applied in "
110 RuleTy::ForVectorization}),
112 "Prioritize loop cache cost"),
113 clEnumValN(RuleTy::PerInstrOrderCost,
"instorder",
114 "Prioritize the IVs order of each instruction"),
115 clEnumValN(RuleTy::ForVectorization,
"vectorize",
116 "Prioritize vectorization"),
118 "Ignore profitability, force interchange (does not "
119 "work with other options)")));
124 cl::desc(
"Support for the inner-loop reduction pattern."));
129 for (RuleTy Rule : Rules) {
130 if (!Set.insert(Rule).second)
132 if (Rule == RuleTy::Ignore)
139 for (
auto &Row : DepMatrix) {
152 assert(Src->getParent() == Dst->getParent() && Src != Dst &&
153 "Expected Src and Dst to be different instructions in the same BB");
155 bool FoundSrc =
false;
176 unsigned NumInsts = 0;
204 unsigned NumMemInstr = MemInstr.
size();
206 <<
" Loads and Stores to analyze\n");
210 L->getStartLoc(), L->getHeader())
211 <<
"Number of loads/stores exceeded, the supported maximum can be "
212 "increased with option -loop-interchange-max-mem-instr-ratio.";
222 for (
I = MemInstr.
begin(), IE = MemInstr.
end();
I != IE; ++
I) {
223 for (J =
I, JE = MemInstr.
end(); J != JE; ++J) {
224 std::vector<char> Dep;
231 if (
auto D = DI->
depends(Src, Dst)) {
232 assert(
D->isOrdered() &&
"Expected an output, flow or anti dep.");
235 if (
D->normalize(SE))
238 D->isFlow() ?
"flow" :
D->isAnti() ?
"anti" :
"output";
239 dbgs() <<
"Found " << DepType
240 <<
" dependency between Src and Dst\n"
241 <<
" Src:" << *Src <<
"\n Dst:" << *Dst <<
'\n');
242 unsigned Levels =
D->getLevels();
244 for (
unsigned II = 1;
II <= Levels; ++
II) {
251 unsigned Dir =
D->getDirection(
II);
265 if (
D->isConfused()) {
266 assert(Dep.empty() &&
"Expected empty dependency vector");
267 Dep.assign(Level,
'*');
270 while (Dep.size() != Level) {
279 L->getStartLoc(), L->getHeader())
280 <<
"All loops have dependencies in all directions.";
286 bool IsKnownForward =
true;
287 if (Src->getParent() != Dst->getParent()) {
291 IsKnownForward =
false;
297 "Unexpected instructions");
302 bool IsReversed =
D->getSrc() != Src;
304 IsKnownForward =
false;
320 DepMatrix.push_back(Dep);
327 DepMatrix[Ite->second].back() =
'*';
339 for (
auto &Row : DepMatrix)
348static std::optional<bool>
361 unsigned InnerLoopId,
362 unsigned OuterLoopId) {
363 unsigned NumRows = DepMatrix.size();
364 std::vector<char> Cur;
366 for (
unsigned Row = 0; Row < NumRows; ++Row) {
369 Cur = DepMatrix[Row];
382 std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
391 << L.getHeader()->getParent()->getName() <<
" Loop: %"
392 << L.getHeader()->getName() <<
'\n');
393 assert(LoopList.
empty() &&
"LoopList should initially be empty!");
394 Loop *CurrentLoop = &L;
395 const std::vector<Loop *> *Vec = &CurrentLoop->
getSubLoops();
396 while (!Vec->empty()) {
400 if (Vec->size() != 1) {
406 CurrentLoop = Vec->front();
414 unsigned LoopNestDepth = LoopList.
size();
416 LLVM_DEBUG(
dbgs() <<
"Unsupported depth of loop nest " << LoopNestDepth
424 <<
"Unsupported depth of loop nest, the supported range is ["
435 for (
Loop *L : LoopList) {
441 if (L->getNumBackEdges() != 1) {
445 if (!L->getExitingBlock()) {
456class LoopInterchangeLegality {
458 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
459 OptimizationRemarkEmitter *ORE, DominatorTree *DT)
460 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), DT(DT), ORE(ORE) {}
463 bool canInterchangeLoops(
unsigned InnerLoopId,
unsigned OuterLoopId,
464 CharMatrix &DepMatrix);
468 bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
472 bool isLoopStructureUnderstood();
474 bool currentLimitations();
476 const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions()
const {
477 return OuterInnerReductions;
481 return InnerLoopInductions;
485 return HasNoWrapReductions;
494 struct InnerReduction {
502 StoreInst *LcssaStore;
509 return InnerReductions;
513 bool tightlyNested(Loop *Outer, Loop *Inner);
514 bool containsUnsafeInstructions(BasicBlock *BB, Instruction *Skip);
520 bool findInductionAndReductions(Loop *L,
534 bool isInnerReduction(Loop *L, PHINode *Phi,
535 SmallVectorImpl<Instruction *> &HasNoWrapInsts);
544 OptimizationRemarkEmitter *ORE;
548 SmallPtrSet<PHINode *, 4> OuterInnerReductions;
556 SmallVector<Instruction *, 4> HasNoWrapReductions;
560 SmallVector<Instruction *, 4> HasNoInfInsts;
569class CacheCostManager {
571 LoopStandardAnalysisResults *AR;
576 std::optional<std::unique_ptr<CacheCost>> CC;
580 DenseMap<const Loop *, unsigned> CostMap;
582 void computeIfUnitinialized();
585 CacheCostManager(Loop *OutermostLoop, LoopStandardAnalysisResults *AR,
587 : OutermostLoop(OutermostLoop), AR(AR), DI(DI) {}
588 CacheCost *getCacheCost();
589 const DenseMap<const Loop *, unsigned> &getCostMap();
594class LoopInterchangeProfitability {
596 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
597 OptimizationRemarkEmitter *ORE)
598 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
601 bool isProfitable(
const Loop *InnerLoop,
const Loop *OuterLoop,
602 unsigned InnerLoopId,
unsigned OuterLoopId,
603 CharMatrix &DepMatrix, CacheCostManager &CCM);
606 int getInstrOrderCost();
607 std::optional<bool> isProfitablePerLoopCacheAnalysis(
608 const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC);
609 std::optional<bool> isProfitablePerInstrOrderCost();
610 std::optional<bool> isProfitableForVectorization(
unsigned InnerLoopId,
611 unsigned OuterLoopId,
612 CharMatrix &DepMatrix);
620 OptimizationRemarkEmitter *ORE;
624class LoopInterchangeTransform {
626 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
627 LoopInfo *LI, DominatorTree *DT,
628 const LoopInterchangeLegality &LIL)
629 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
634 void reduction2Memory();
635 void restructureLoops(Loop *NewInner, Loop *NewOuter,
636 BasicBlock *OrigInnerPreHeader,
637 BasicBlock *OrigOuterPreHeader);
638 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
641 bool adjustLoopLinks();
642 bool adjustLoopBranches();
653 const LoopInterchangeLegality &LIL;
656struct LoopInterchange {
657 ScalarEvolution *SE =
nullptr;
658 LoopInfo *LI =
nullptr;
659 DependenceInfo *DI =
nullptr;
660 DominatorTree *DT =
nullptr;
661 LoopStandardAnalysisResults *AR =
nullptr;
664 OptimizationRemarkEmitter *ORE;
666 LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
667 DominatorTree *DT, LoopStandardAnalysisResults *AR,
668 OptimizationRemarkEmitter *ORE)
669 : SE(SE), LI(LI), DI(DI), DT(DT), AR(AR), ORE(ORE) {}
672 if (
L->getParentLoop())
674 SmallVector<Loop *, 8> LoopList;
676 return processLoopList(LoopList);
679 bool run(LoopNest &LN) {
680 SmallVector<Loop *, 8> LoopList(LN.
getLoops());
681 for (
unsigned I = 1;
I < LoopList.size(); ++
I)
682 if (LoopList[
I]->getParentLoop() != LoopList[
I - 1])
684 return processLoopList(LoopList);
690 return LoopList.
size() - 1;
693 bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
698 "Unsupported depth of loop nest.");
700 unsigned LoopNestDepth = LoopList.
size();
703 dbgs() <<
"Processing LoopList of size = " << LoopNestDepth
704 <<
" containing the following loops:\n";
705 for (
auto *L : LoopList) {
711 CharMatrix DependencyMatrix;
712 Loop *OuterMostLoop = *(LoopList.begin());
714 OuterMostLoop, DI, SE, ORE)) {
726 <<
"' needs an unique exit block");
730 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
731 CacheCostManager CCM(LoopList[0], AR, DI);
736 for (
unsigned j = SelecLoopId;
j > 0;
j--) {
737 bool ChangedPerIter =
false;
738 for (
unsigned i = SelecLoopId; i > SelecLoopId -
j; i--) {
740 processLoop(LoopList, i, i - 1, DependencyMatrix, CCM);
741 ChangedPerIter |= Interchanged;
752 bool processLoop(SmallVectorImpl<Loop *> &LoopList,
unsigned InnerLoopId,
753 unsigned OuterLoopId,
754 std::vector<std::vector<char>> &DependencyMatrix,
755 CacheCostManager &CCM) {
756 Loop *OuterLoop = LoopList[OuterLoopId];
757 Loop *InnerLoop = LoopList[InnerLoopId];
759 <<
" and OuterLoopId = " << OuterLoopId <<
"\n");
760 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE, DT);
761 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
762 LLVM_DEBUG(
dbgs() <<
"Cannot prove legality, not interchanging loops '"
763 << OuterLoop->
getName() <<
"' and '"
764 << InnerLoop->
getName() <<
"'\n");
769 <<
"' are legal to interchange\n");
770 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
771 if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
772 DependencyMatrix, CCM)) {
774 <<
"' and '" << InnerLoop->
getName()
775 <<
"' not profitable.\n");
780 return OptimizationRemark(
DEBUG_TYPE,
"Interchanged",
783 <<
"Loop interchanged with enclosing loop.";
786 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
787 LIT.transform(LIL.getHasNoWrapReductions(), LIL.getHasNoInfInsts());
789 << OuterLoop->
getName() <<
"' and inner loop '"
790 << InnerLoop->
getName() <<
"'\n");
796 std::swap(LoopList[OuterLoopId], LoopList[InnerLoopId]);
809bool LoopInterchangeLegality::containsUnsafeInstructions(
BasicBlock *BB,
811 return any_of(*BB, [Skip](
const Instruction &
I) {
814 return I.mayHaveSideEffects() ||
I.mayReadFromMemory();
818bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
824 <<
"' and '" << InnerLoop->
getName()
825 <<
"' are tightly nested\n");
845 for (BasicBlock *Succ :
successors(OuterLoopHeader))
846 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->
getHeader())
849 LLVM_DEBUG(
dbgs() <<
"Checking instructions in Loop header and Loop latch\n");
855 assert(InnerReductions.size() <= 1 &&
856 "So far we only support at most one reduction.");
857 if (InnerReductions.size() == 1)
858 Skip = InnerReductions[0].LcssaStore;
862 if (containsUnsafeInstructions(OuterLoopHeader, Skip) ||
863 containsUnsafeInstructions(OuterLoopLatch, Skip))
869 if (InnerLoopPreHeader != OuterLoopHeader &&
870 containsUnsafeInstructions(InnerLoopPreHeader, Skip))
878 if (&SuccInner != OuterLoopLatch) {
880 <<
" does not lead to the outer loop latch.\n";);
886 if (containsUnsafeInstructions(InnerLoopExit, Skip))
894bool LoopInterchangeLegality::isLoopStructureUnderstood() {
896 for (PHINode *InnerInduction : InnerLoopInductions) {
897 unsigned Num = InnerInduction->getNumOperands();
898 for (
unsigned i = 0; i < Num; ++i) {
899 Value *Val = InnerInduction->getOperand(i);
909 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
910 InnerLoopPreheader &&
924 CondBrInst *InnerLoopLatchBI =
926 if (!InnerLoopLatchBI)
928 if (CmpInst *InnerLoopCmp =
930 Value *Op0 = InnerLoopCmp->getOperand(0);
931 Value *Op1 = InnerLoopCmp->getOperand(1);
942 std::function<bool(
Value *)> IsPathToInnerIndVar;
943 IsPathToInnerIndVar = [
this, &IsPathToInnerIndVar](
const Value *
V) ->
bool {
952 return IsPathToInnerIndVar(
I->getOperand(0));
954 return IsPathToInnerIndVar(
I->getOperand(0)) &&
955 IsPathToInnerIndVar(
I->getOperand(1));
961 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
969 }
else if (IsPathToInnerIndVar(Op1) && !
isa<Constant>(Op1)) {
992 if (
PHI->getNumIncomingValues() != 1)
1060 assert(
I->getOpcode() == OpCode &&
1061 "Expected the instruction to be the reduction operation");
1066 if (
I->hasNoSignedWrap() ||
I->hasNoUnsignedWrap())
1090 if (
PHI->getNumIncomingValues() == 1)
1103bool LoopInterchangeLegality::isInnerReduction(
1104 Loop *L, PHINode *Phi, SmallVectorImpl<Instruction *> &HasNoWrapInsts) {
1108 if (!
L->isInnermost()) {
1109 LLVM_DEBUG(
dbgs() <<
"Only supported when the loop is the innermost.\n");
1111 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerReduction",
1112 L->getStartLoc(),
L->getHeader())
1113 <<
"Only supported when the loop is the innermost.";
1118 if (
Phi->getNumIncomingValues() != 2)
1121 Value *Init =
Phi->getIncomingValueForBlock(
L->getLoopPreheader());
1122 Value *
Next =
Phi->getIncomingValueForBlock(
L->getLoopLatch());
1128 <<
"Only supported for the reduction with a constant initial value.\n");
1130 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerReduction",
1131 L->getStartLoc(),
L->getHeader())
1132 <<
"Only supported for the reduction with a constant initial "
1141 if (!
L->contains(BB))
1146 if (!
Phi->hasOneUser())
1158 PHINode *Lcssa = NULL;
1159 for (
auto *U :
Next->users()) {
1164 if (Lcssa == NULL &&
P->getParent() == ExitBlock &&
1165 P->getIncomingValueForBlock(
L->getLoopLatch()) ==
Next)
1176 LLVM_DEBUG(
dbgs() <<
"Only supported when the reduction is used once in "
1177 "the outer loop.\n");
1179 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerReduction",
1180 L->getStartLoc(),
L->getHeader())
1181 <<
"Only supported when the reduction is used once in the outer "
1187 StoreInst *LcssaStore =
1189 if (!LcssaStore || LcssaStore->
getParent() != ExitBlock)
1202 LLVM_DEBUG(
dbgs() <<
"Only supported when memory reference dominate "
1203 "the inner loop.\n");
1205 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerReduction",
1206 L->getStartLoc(),
L->getHeader())
1207 <<
"Only supported when memory reference dominate the inner "
1218 SR.LcssaPhi = Lcssa;
1219 SR.LcssaStore = LcssaStore;
1223 InnerReductions.push_back(SR);
1227bool LoopInterchangeLegality::findInductionAndReductions(
1229 if (!
L->getLoopLatch() || !
L->getLoopPredecessor())
1231 for (PHINode &
PHI :
L->getHeader()->phis()) {
1232 InductionDescriptor
ID;
1239 if (OuterInnerReductions.count(&
PHI)) {
1240 LLVM_DEBUG(
dbgs() <<
"Found a reduction across the outer loop.\n");
1242 isInnerReduction(L, &
PHI, HasNoWrapReductions)) {
1248 assert(
PHI.getNumIncomingValues() == 2 &&
1249 "Phis in loop header should have exactly 2 incoming values");
1254 InnerLoop, V, HasNoWrapReductions, HasNoInfInsts);
1259 <<
"Failed to recognize PHI as an induction or reduction.\n");
1262 OuterInnerReductions.insert(&
PHI);
1263 OuterInnerReductions.insert(InnerRedPhi);
1269 if (InnerReductions.size() > 1) {
1272 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerReduction",
1273 L->getStartLoc(),
L->getHeader())
1274 <<
"Only supports at most one reduction.";
1284bool LoopInterchangeLegality::currentLimitations() {
1294 dbgs() <<
"Loops where the latch is not the exiting block are not"
1295 <<
" supported currently.\n");
1297 return OptimizationRemarkMissed(
DEBUG_TYPE,
"ExitingNotLatch",
1300 <<
"Loops where the latch is not the exiting block cannot be"
1301 " interchange currently.";
1307 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
1309 dbgs() <<
"Only outer loops with induction or reduction PHI nodes "
1310 <<
"are supported currently.\n");
1312 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedPHIOuter",
1315 <<
"Only outer loops with induction or reduction PHI nodes can be"
1316 " interchanged currently.";
1325 Loop *CurLevelLoop = OuterLoop;
1328 CurLevelLoop = CurLevelLoop->
getSubLoops().front();
1329 if (!findInductionAndReductions(CurLevelLoop, Inductions,
nullptr)) {
1331 dbgs() <<
"Only inner loops with induction or reduction PHI nodes "
1332 <<
"are supported currently.\n");
1334 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedPHIInner",
1337 <<
"Only inner loops with induction or reduction PHI nodes can be"
1338 " interchange currently.";
1345 if (!isLoopStructureUnderstood()) {
1348 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedStructureInner",
1351 <<
"Inner loop structure not understood currently.";
1359bool LoopInterchangeLegality::findInductions(
1360 Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
1361 for (PHINode &
PHI :
L->getHeader()->phis()) {
1362 InductionDescriptor
ID;
1366 return !Inductions.
empty();
1386 if (
PHI.getNumIncomingValues() > 1)
1390 if (&
PHI == LcssaReduction)
1393 PHINode *PN = dyn_cast<PHINode>(U);
1396 if (Reductions.count(PN))
1398 BasicBlock *PB = PN->getParent();
1399 if (!OuterL->contains(PB))
1401 return PB != OuterL->getLoopLatch();
1418 for (
Value *Incoming :
PHI.incoming_values()) {
1465 for (
auto *U :
PHI.users()) {
1474bool LoopInterchangeLegality::canInterchangeLoops(
unsigned InnerLoopId,
1475 unsigned OuterLoopId,
1476 CharMatrix &DepMatrix) {
1478 LLVM_DEBUG(
dbgs() <<
"Failed interchange InnerLoopId = " << InnerLoopId
1479 <<
" and OuterLoopId = " << OuterLoopId
1480 <<
" due to dependence\n");
1482 return OptimizationRemarkMissed(
DEBUG_TYPE,
"Dependence",
1485 <<
"Cannot interchange loops due to dependences.";
1490 for (
auto *BB : OuterLoop->
blocks())
1491 for (Instruction &
I : *BB) {
1498 if (!
I.mayHaveSideEffects() && !
I.mayReadFromMemory())
1503 <<
"Loops contain instructions that cannot be safely interchanged\n");
1505 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsafeInst",
1506 I.getDebugLoc(),
I.getParent())
1507 <<
"Cannot interchange loops due to instruction that is "
1508 "potentially unsafe to interchange.";
1514 if (!findInductions(InnerLoop, InnerLoopInductions)) {
1515 LLVM_DEBUG(
dbgs() <<
"Could not find inner loop induction variables.\n");
1520 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in inner loop latch.\n");
1522 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerLatchPHI",
1525 <<
"Cannot interchange loops because unsupported PHI nodes found "
1526 "in inner loop latch.";
1533 if (currentLimitations()) {
1534 LLVM_DEBUG(
dbgs() <<
"Not legal because of current transform limitation\n");
1539 if (!tightlyNested(OuterLoop, InnerLoop)) {
1542 return OptimizationRemarkMissed(
DEBUG_TYPE,
"NotTightlyNested",
1545 <<
"Cannot interchange loops because they are not tightly "
1553 PHINode *LcssaReduction =
nullptr;
1554 assert(InnerReductions.size() <= 1 &&
1555 "So far we only support at most one reduction.");
1556 if (InnerReductions.size() == 1)
1557 LcssaReduction = InnerReductions[0].LcssaPhi;
1561 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in inner loop exit.\n");
1563 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedExitPHI",
1566 <<
"Found unsupported PHI node in loop exit.";
1572 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in outer loop exit.\n");
1574 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedExitPHI",
1577 <<
"Found unsupported PHI node in loop exit.";
1583 [](PHINode &
PHI) { return PHI.getNumIncomingValues() != 1; })) {
1584 LLVM_DEBUG(
dbgs() <<
"Only outer loop latch PHI nodes with one incoming "
1585 "value are supported.\n");
1587 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedLatchPHI",
1590 <<
"Only outer loop latch PHI nodes with one incoming value are "
1599void CacheCostManager::computeIfUnitinialized() {
1614 for (
const auto &[Idx,
Cost] :
enumerate((*CC)->getLoopCosts()))
1615 CostMap[
Cost.first] = Idx;
1618CacheCost *CacheCostManager::getCacheCost() {
1619 computeIfUnitinialized();
1623const DenseMap<const Loop *, unsigned> &CacheCostManager::getCostMap() {
1624 computeIfUnitinialized();
1640 return std::nullopt;
1645 return std::nullopt;
1648 std::optional<const SCEV *> Coeff =
1650 if (!Coeff.has_value())
1651 return std::nullopt;
1654 assert(!*Coeff &&
"Found more than one addrec for the same loop");
1660int LoopInterchangeProfitability::getInstrOrderCost() {
1661 SmallPtrSet<const SCEV *, 4> GoodBasePtrs, BadBasePtrs;
1662 for (BasicBlock *BB : InnerLoop->
blocks()) {
1663 for (Instruction &Ins : *BB) {
1668 std::optional<const SCEV *> OuterCoeff =
1670 std::optional<const SCEV *> InnerCoeff =
1673 if (!OuterCoeff.has_value() || !*OuterCoeff || !InnerCoeff.has_value() ||
1683 const SCEV *OuterStep = SE->
getAbsExpr(*OuterCoeff,
false);
1684 const SCEV *InnerStep = SE->
getAbsExpr(*InnerCoeff,
false);
1704 GoodBasePtrs.
insert(BasePtr);
1706 BadBasePtrs.
insert(BasePtr);
1710 int GoodOrder = GoodBasePtrs.
size();
1711 int BadOrder = BadBasePtrs.
size();
1712 return GoodOrder - BadOrder;
1716LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
1717 const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC) {
1721 auto InnerLoopIt = CostMap.
find(InnerLoop);
1722 if (InnerLoopIt == CostMap.
end())
1723 return std::nullopt;
1724 auto OuterLoopIt = CostMap.
find(OuterLoop);
1725 if (OuterLoopIt == CostMap.
end())
1726 return std::nullopt;
1729 return std::nullopt;
1730 unsigned InnerIndex = InnerLoopIt->second;
1731 unsigned OuterIndex = OuterLoopIt->second;
1733 <<
", OuterIndex = " << OuterIndex <<
"\n");
1734 assert(InnerIndex != OuterIndex &&
"CostMap should assign unique "
1735 "numbers to each loop");
1736 return std::optional<bool>(InnerIndex < OuterIndex);
1740LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
1744 int Cost = getInstrOrderCost();
1747 return std::optional<bool>(
true);
1749 return std::nullopt;
1754 for (
const auto &Dep : DepMatrix) {
1755 char Dir = Dep[LoopId];
1756 char DepType = Dep.back();
1757 assert((DepType ==
'<' || DepType ==
'*') &&
1758 "Unexpected element in dependency vector");
1761 if (Dir ==
'=' || Dir ==
'I')
1767 if (Dir ==
'<' && DepType ==
'<')
1776std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1777 unsigned InnerLoopId,
unsigned OuterLoopId, CharMatrix &DepMatrix) {
1793 return std::nullopt;
1796bool LoopInterchangeProfitability::isProfitable(
1797 const Loop *InnerLoop,
const Loop *OuterLoop,
unsigned InnerLoopId,
1798 unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
1807 if (InnerBTC && InnerBTC->
isZero()) {
1808 LLVM_DEBUG(
dbgs() <<
"Inner loop back-edge isn't taken, rejecting "
1809 "single iteration loop\n");
1812 if (OuterBTC && OuterBTC->
isZero()) {
1813 LLVM_DEBUG(
dbgs() <<
"Outer loop back-edge isn't taken, rejecting "
1814 "single iteration loop\n");
1822 "Duplicate rules and option 'ignore' are not allowed");
1832 std::optional<bool> shouldInterchange;
1835 case RuleTy::PerLoopCacheAnalysis: {
1836 CacheCost *CC = CCM.getCacheCost();
1837 const DenseMap<const Loop *, unsigned> &CostMap = CCM.getCostMap();
1838 shouldInterchange = isProfitablePerLoopCacheAnalysis(CostMap, CC);
1841 case RuleTy::PerInstrOrderCost:
1842 shouldInterchange = isProfitablePerInstrOrderCost();
1844 case RuleTy::ForVectorization:
1846 isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
1848 case RuleTy::Ignore:
1855 if (shouldInterchange.has_value())
1859 if (!shouldInterchange.has_value()) {
1861 return OptimizationRemarkMissed(
DEBUG_TYPE,
"InterchangeNotProfitable",
1864 <<
"Insufficient information to calculate the cost of loop for "
1868 }
else if (!shouldInterchange.value()) {
1870 return OptimizationRemarkMissed(
DEBUG_TYPE,
"InterchangeNotProfitable",
1873 <<
"Interchanging loops is not considered to improve cache "
1874 "locality nor vectorization.";
1881void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1883 for (Loop *L : *OuterLoop)
1884 if (L == InnerLoop) {
1885 OuterLoop->removeChildLoop(L);
1914void LoopInterchangeTransform::restructureLoops(
1915 Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1916 BasicBlock *OrigOuterPreHeader) {
1917 Loop *OuterLoopParent = OuterLoop->getParentLoop();
1924 if (OuterLoopParent) {
1926 removeChildLoop(OuterLoopParent, NewInner);
1927 removeChildLoop(NewInner, NewOuter);
1930 removeChildLoop(NewInner, NewOuter);
1938 SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->
blocks());
1942 for (BasicBlock *BB : NewInner->
blocks())
1950 for (BasicBlock *BB : OrigInnerBBs) {
1955 if (BB == OuterHeader || BB == OuterLatch)
1993void LoopInterchangeTransform::reduction2Memory() {
1995 LIL.getInnerReductions();
1998 "So far we only support at most one reduction.");
2000 LoopInterchangeLegality::InnerReduction SR = InnerReductions[0];
2006 PHINode *FirstIter =
2007 Builder.CreatePHI(Type::getInt1Ty(
Context), 2,
"first.iter");
2012 assert(FirstIter->
isComplete() &&
"The FirstIter PHI node is not complete.");
2017 Instruction *LoadMem = Builder.CreateLoad(SR.ElemTy, SR.MemRef);
2020 Value *NewVar = Builder.CreateSelect(FirstIter, SR.Init, LoadMem,
"new.var");
2031bool LoopInterchangeTransform::transform(
2034 bool Transformed =
false;
2037 LIL.getInnerReductions();
2038 if (InnerReductions.
size() == 1)
2042 auto &InductionPHIs = LIL.getInnerLoopInductions();
2043 if (InductionPHIs.empty()) {
2044 LLVM_DEBUG(
dbgs() <<
"Failed to find the point to split loop latch \n");
2048 SmallVector<Instruction *, 8> InnerIndexVarList;
2049 for (PHINode *CurInductionPHI : InductionPHIs) {
2051 CurInductionPHI->getIncomingValueForBlock(InnerLoop->
getLoopLatch()));
2053 "Incoming value from loop latch isn't an instruction");
2056 InnerIndexVarList.
push_back(IncomingValue);
2067 SmallSetVector<Instruction *, 4> WorkList;
2069 auto MoveInstructions = [&i, &WorkList,
this, &InductionPHIs, NewLatch]() {
2070 for (; i < WorkList.
size(); i++) {
2076 "Moving instructions with side-effects may change behavior of "
2087 for (
Value *
Op : WorkList[i]->operands()) {
2104 for (Instruction *InnerIndexVar : InnerIndexVarList)
2122 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
2124 if (InnerLoopPreHeader != OuterLoopHeader) {
2127 assert(
P.getNumIncomingValues() == 1 &&
2128 "Expected single-incoming PHIs in inner loop preheader");
2129 P.replaceAllUsesWith(
P.getIncomingValue(0));
2130 P.eraseFromParent();
2132 for (Instruction &
I :
2134 std::prev(InnerLoopPreHeader->
end()))))
2138 Transformed |= adjustLoopLinks();
2146 for (Instruction *
Reduction : DropNoWrapInsts) {
2150 for (Instruction *
I : DropNoInfInsts)
2151 I->setHasNoInfs(
false);
2172 I->removeFromParent();
2187 std::vector<DominatorTree::UpdateType> &DTUpdates,
2188 bool MustUpdateOnce =
true) {
2190 "BI must jump to OldBB exactly once.");
2192 for (
Use &
Op : Term->operands())
2199 DTUpdates.push_back(
2200 {DominatorTree::UpdateKind::Insert, Term->getParent(), NewBB});
2201 DTUpdates.push_back(
2202 {DominatorTree::UpdateKind::Delete, Term->getParent(), OldBB});
2221 assert(
P.getNumIncomingValues() == 1 &&
2222 "Only loops with a single exit are supported!");
2224 Value *IncomingValue =
P.getIncomingValueForBlock(InnerLatch);
2231 "Expected non-instruction incoming value to be loop invariant");
2232 P.replaceAllUsesWith(IncomingValue);
2233 P.eraseFromParent();
2242 if (IncIInnerMost->getParent() != InnerLatch &&
2243 IncIInnerMost->
getParent() != InnerHeader)
2247 [OuterHeader, OuterExit, IncI, InnerHeader](
User *U) {
2248 if (!isa<PHINode>(U))
2250 return (cast<PHINode>(U)->getParent() == OuterHeader &&
2251 IncI->getParent() == InnerHeader) ||
2252 cast<PHINode>(U)->getParent() == OuterExit;
2254 "Can only replace phis iff the phi-uses are in the loop nest exit "
2255 "or the incoming value is defined in the inner header (it will "
2256 "dominate all loop blocks after interchanging)");
2257 P.replaceAllUsesWith(IncI);
2258 P.eraseFromParent();
2286 if (
P.getNumIncomingValues() != 1)
2300 if (Pred == OuterLatch)
2305 P.setIncomingValue(0, NewPhi);
2345 if (OuterLoopLatch == InnerLoopExit)
2352 assert(Phi->getNumIncomingValues() == 1 &&
"Single input phi expected");
2353 LLVM_DEBUG(
dbgs() <<
"Removing 1-input phi in non-exit block: " << *Phi
2355 Phi->replaceAllUsesWith(Phi->getIncomingValue(0));
2356 Phi->eraseFromParent();
2360bool LoopInterchangeTransform::adjustLoopBranches() {
2362 std::vector<DominatorTree::UpdateType> DTUpdates;
2364 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
2367 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
2368 InnerLoopPreHeader != InnerLoop->
getHeader() && OuterLoopPreHeader &&
2369 InnerLoopPreHeader &&
"Guaranteed by loop-simplify form");
2379 OuterLoopPreHeader =
2381 if (InnerLoopPreHeader == OuterLoop->getHeader())
2382 InnerLoopPreHeader =
2387 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
2389 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
2396 CondBrInst *OuterLoopLatchBI =
2398 CondBrInst *InnerLoopLatchBI =
2403 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
2404 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
2412 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
2415 if (!InnerLoopHeaderSuccessor)
2423 InnerLoopPreHeader, DTUpdates,
false);
2433 InnerLoopHeaderSuccessor, DTUpdates,
2441 OuterLoopPreHeader, DTUpdates);
2444 if (InnerLoopLatchBI->
getSuccessor(0) == InnerLoopHeader)
2445 InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(1);
2447 InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(0);
2450 InnerLoopLatchSuccessor, DTUpdates);
2452 if (OuterLoopLatchBI->
getSuccessor(0) == OuterLoopHeader)
2453 OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(1);
2455 OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(0);
2458 OuterLoopLatchSuccessor, DTUpdates);
2459 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
2463 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
2464 OuterLoopPreHeader);
2466 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
2467 OuterLoopHeader, OuterLoopLatch, InnerLoop->
getExitBlock(),
2473 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
2476 for (PHINode &
PHI : InnerLoopHeader->
phis())
2477 if (OuterInnerReductions.contains(&
PHI))
2480 for (PHINode &
PHI : OuterLoopHeader->
phis())
2481 if (OuterInnerReductions.contains(&
PHI))
2487 for (PHINode *
PHI : OuterLoopPHIs) {
2490 assert(OuterInnerReductions.count(
PHI) &&
"Expected a reduction PHI node");
2492 for (PHINode *
PHI : InnerLoopPHIs) {
2495 assert(OuterInnerReductions.count(
PHI) &&
"Expected a reduction PHI node");
2508 SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
2509 for (Instruction &
I :
2517bool LoopInterchangeTransform::adjustLoopLinks() {
2519 bool Changed = adjustLoopBranches();
2524 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
2545 LLVM_DEBUG(
dbgs() <<
"Not valid loop candidate for interchange\n");
2553 <<
"Computed dependence info, invoking the transform.";
2557 if (!LoopInterchange(&AR.
SE, &AR.
LI, &DI, &AR.
DT, &AR, &ORE).run(LN))
2559 U.markLoopNestChanged(
true);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the StringMap class.
ReachingDefInfo InstSet InstSet & Ignore
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB)
Move the contents of SourceBB to before the last instruction of TargetBB.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the interface for the loop cache analysis.
SmallVector< Loop *, 4 > LoopVector
Loop::LoopBounds::Direction Direction
static cl::list< RuleTy > Profitabilities("loop-interchange-profitabilities", cl::MiscFlags::CommaSeparated, cl::Hidden, cl::desc("List of profitability heuristics to be used. They are applied in " "the given order"), cl::list_init< RuleTy >({RuleTy::PerInstrOrderCost, RuleTy::ForVectorization}), cl::values(clEnumValN(RuleTy::PerLoopCacheAnalysis, "cache", "Prioritize loop cache cost"), clEnumValN(RuleTy::PerInstrOrderCost, "instorder", "Prioritize the IVs order of each instruction"), clEnumValN(RuleTy::ForVectorization, "vectorize", "Prioritize vectorization"), clEnumValN(RuleTy::Ignore, "ignore", "Ignore profitability, force interchange (does not " "work with other options)")))
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
static cl::opt< unsigned int > MinLoopNestDepth("loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden, cl::desc("Minimum depth of loop nest considered for the transform"))
static void updateSuccessor(Instruction *Term, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
static cl::opt< bool > EnableReduction2Memory("loop-interchange-reduction-to-mem", cl::init(false), cl::Hidden, cl::desc("Support for the inner-loop reduction pattern."))
static bool isComputableLoopNest(ScalarEvolution *SE, ArrayRef< Loop * > LoopList)
std::optional< const SCEV * > getAddRecCoefficient(ScalarEvolution &SE, const SCEV *S, const Loop *L)
If \S contains an affine addrec for L, return the step recurrence of it.
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static void simplifyLCSSAPhis(Loop *OuterLoop, Loop *InnerLoop)
This deals with a corner case when a LCSSA phi node appears in a non-exit block: the outer loop latch...
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
static void printDepMatrix(CharMatrix &DepMatrix)
static cl::opt< unsigned int > MaxMemInstrRatio("loop-interchange-max-mem-instr-ratio", cl::init(4), cl::Hidden, cl::desc("Maximum number of load/store instructions squared in relation to " "the total number of instructions. Higher value may lead to more " "interchanges at the cost of compile-time"))
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
static PHINode * findInnerReductionPhi(Loop *L, Value *V, SmallVectorImpl< Instruction * > &HasNoWrapInsts, SmallVectorImpl< Instruction * > &HasNoInfInsts)
static bool areInnerLoopExitPHIsSupported(Loop *OuterL, Loop *InnerL, SmallPtrSetImpl< PHINode * > &Reductions, PHINode *LcssaReduction)
We currently only support LCSSA PHI nodes in the inner loop exit if their users are either of the fol...
static cl::opt< unsigned int > MaxLoopNestDepth("loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden, cl::desc("Maximum depth of loop nest considered for the transform"))
static bool hasSupportedLoopDepth(ArrayRef< Loop * > LoopList, OptimizationRemarkEmitter &ORE)
static bool inThisOrder(const Instruction *Src, const Instruction *Dst)
Return true if Src appears before Dst in the same basic block.
static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId)
Return true if we can vectorize the loop specified by LoopId.
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
static Value * followLCSSA(Value *SV)
static void populateWorklist(Loop &L, LoopVector &LoopList)
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE)
static std::optional< bool > isLexicographicallyPositive(ArrayRef< char > DV, unsigned Begin, unsigned End)
static bool checkReductionKind(Loop *L, PHINode *PHI, SmallVectorImpl< Instruction * > &HasNoWrapInsts, SmallVectorImpl< Instruction * > &HasNoInfInsts)
static bool noDuplicateRulesAndIgnore(ArrayRef< RuleTy > Rules)
This file defines the interface for the loop nest analysis.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
loop Loop Strength Reduction
uint64_t IntrinsicInst * II
SmallVector< Value *, 8 > ValueVector
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
Get the first element.
size_t size() const
Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
static LLVM_ABI std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
CacheCostTy getLoopCost(const Loop &L) const
Return the estimated cost of loop L if the given loop is part of the loop nest associated with this o...
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
iterator find(const_arg_type_t< KeyT > Val)
DependenceInfo - This class is the main dependence-analysis driver.
LLVM_ABI std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool UnderRuntimeAssumptions=false)
depends - Tests for a dependence between the Src and Dst instructions.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, ArrayRef< const SCEVPredicate * > NoWrapPreds={}, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void changeLoopFor(const BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
This class represents a loop nest and can be used to query its properties.
static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End, bool CheckUniquePred=false)
Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Function * getParent() const
Return the function to which the loop-nest belongs.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
StringRef getName() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
bool isComplete() const
If the PHI node is complete which means all of its parent's predecessors have incoming value in this ...
op_range incoming_values()
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
static unsigned getIncomingValueNumForOperand(unsigned i)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
unsigned getOpcode() const
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
SCEVUse getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents an analyzed expression in the program.
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
The main scalar evolution driver.
LLVM_ABI const SCEV * getAbsExpr(const SCEV *Op, bool IsNSW)
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
Represent a constant reference to a string, i.e.
constexpr size_t size() const
Get the string size.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ BasicBlock
Various leaf nodes.
list_initializer< Ty > list_init(ArrayRef< Ty > Vals)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
NodeAddr< PhiNode * > Phi
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
LLVM_ABI BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
LLVM_ABI bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FMinimumNum
FP min with llvm.minimumnum semantics.
@ Or
Bitwise or logical OR of integers.
@ FMinimum
FP min with llvm.minimum semantics.
@ Mul
Product of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMaximum
FP max with llvm.maximum semantics.
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ FMaximumNum
FP max with llvm.maximumnum semantics.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
LLVM_ABI PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...