55#define DEBUG_TYPE "loop-interchange"
57STATISTIC(LoopsInterchanged,
"Number of loops interchanged");
61 cl::desc(
"Interchange if you gain more than this number"));
65 cl::desc(
"Maximum number of load/store instructions squared in relation to "
66 "the total number of instructions. Higher value may lead to more "
67 "interchanges at the cost of compile-time"));
81using CharMatrix = std::vector<std::vector<char>>;
96 cl::desc(
"Minimum depth of loop nest considered for the transform"));
101 cl::desc(
"Maximum depth of loop nest considered for the transform"));
107 cl::desc(
"List of profitability heuristics to be used. They are applied in "
110 RuleTy::ForVectorization}),
112 "Prioritize loop cache cost"),
113 clEnumValN(RuleTy::PerInstrOrderCost,
"instorder",
114 "Prioritize the IVs order of each instruction"),
115 clEnumValN(RuleTy::ForVectorization,
"vectorize",
116 "Prioritize vectorization"),
118 "Ignore profitability, force interchange (does not "
119 "work with other options)")));
124 cl::desc(
"Support for the inner-loop reduction pattern."));
129 for (RuleTy Rule : Rules) {
130 if (!Set.insert(Rule).second)
132 if (Rule == RuleTy::Ignore)
139 for (
auto &Row : DepMatrix) {
152 assert(Src->getParent() == Dst->getParent() && Src != Dst &&
153 "Expected Src and Dst to be different instructions in the same BB");
155 bool FoundSrc =
false;
176 unsigned NumInsts = 0;
206 unsigned NumMemInstr = MemInstr.
size();
208 <<
" Loads and Stores to analyze\n");
212 L->getStartLoc(), L->getHeader())
213 <<
"Number of loads/stores exceeded, the supported maximum can be "
214 "increased with option -loop-interchange-max-mem-instr-ratio.";
224 for (
I = MemInstr.
begin(), IE = MemInstr.
end();
I != IE; ++
I) {
225 for (J =
I, JE = MemInstr.
end(); J != JE; ++J) {
226 std::vector<char> Dep;
233 if (
auto D = DI->
depends(Src, Dst)) {
234 assert(
D->isOrdered() &&
"Expected an output, flow or anti dep.");
237 if (
D->normalize(SE))
240 D->isFlow() ?
"flow" :
D->isAnti() ?
"anti" :
"output";
241 dbgs() <<
"Found " << DepType
242 <<
" dependency between Src and Dst\n"
243 <<
" Src:" << *Src <<
"\n Dst:" << *Dst <<
'\n');
244 unsigned Levels =
D->getLevels();
246 for (
unsigned II = 1;
II <= Levels; ++
II) {
253 unsigned Dir =
D->getDirection(
II);
267 if (
D->isConfused()) {
268 assert(Dep.empty() &&
"Expected empty dependency vector");
269 Dep.assign(Level,
'*');
272 while (Dep.size() != Level) {
281 L->getStartLoc(), L->getHeader())
282 <<
"All loops have dependencies in all directions.";
288 bool IsKnownForward =
true;
289 if (Src->getParent() != Dst->getParent()) {
293 IsKnownForward =
false;
299 "Unexpected instructions");
304 bool IsReversed =
D->getSrc() != Src;
306 IsKnownForward =
false;
322 DepMatrix.push_back(Dep);
329 DepMatrix[Ite->second].back() =
'*';
341 for (
auto &Row : DepMatrix)
350static std::optional<bool>
363 unsigned InnerLoopId,
364 unsigned OuterLoopId) {
365 unsigned NumRows = DepMatrix.size();
366 std::vector<char> Cur;
368 for (
unsigned Row = 0; Row < NumRows; ++Row) {
371 Cur = DepMatrix[Row];
384 std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
393 << L.getHeader()->getParent()->getName() <<
" Loop: %"
394 << L.getHeader()->getName() <<
'\n');
395 assert(LoopList.
empty() &&
"LoopList should initially be empty!");
396 Loop *CurrentLoop = &L;
397 const std::vector<Loop *> *Vec = &CurrentLoop->
getSubLoops();
398 while (!Vec->empty()) {
402 if (Vec->size() != 1) {
408 CurrentLoop = Vec->front();
416 unsigned LoopNestDepth = LoopList.
size();
418 LLVM_DEBUG(
dbgs() <<
"Unsupported depth of loop nest " << LoopNestDepth
426 <<
"Unsupported depth of loop nest, the supported range is ["
437 for (
Loop *L : LoopList) {
443 if (L->getNumBackEdges() != 1) {
447 if (!L->getExitingBlock()) {
458class LoopInterchangeLegality {
460 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
461 OptimizationRemarkEmitter *ORE, DominatorTree *DT)
462 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), DT(DT), ORE(ORE) {}
465 bool canInterchangeLoops(
unsigned InnerLoopId,
unsigned OuterLoopId,
466 CharMatrix &DepMatrix);
470 bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
474 bool isLoopStructureUnderstood();
476 bool currentLimitations();
478 const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions()
const {
479 return OuterInnerReductions;
483 return InnerLoopInductions;
487 return HasNoWrapReductions;
496 struct InnerReduction {
504 StoreInst *LcssaStore;
511 return InnerReductions;
515 bool tightlyNested(Loop *Outer, Loop *Inner);
516 bool containsUnsafeInstructions(BasicBlock *BB, Instruction *Skip);
522 bool findInductionAndReductions(Loop *L,
536 bool isInnerReduction(Loop *L, PHINode *Phi,
537 SmallVectorImpl<Instruction *> &HasNoWrapInsts);
546 OptimizationRemarkEmitter *ORE;
550 SmallPtrSet<PHINode *, 4> OuterInnerReductions;
558 SmallVector<Instruction *, 4> HasNoWrapReductions;
562 SmallVector<Instruction *, 4> HasNoInfInsts;
571class CacheCostManager {
573 LoopStandardAnalysisResults *AR;
578 std::optional<std::unique_ptr<CacheCost>> CC;
582 DenseMap<const Loop *, unsigned> CostMap;
584 void computeIfUnitinialized();
587 CacheCostManager(Loop *OutermostLoop, LoopStandardAnalysisResults *AR,
589 : OutermostLoop(OutermostLoop), AR(AR), DI(DI) {}
590 CacheCost *getCacheCost();
591 const DenseMap<const Loop *, unsigned> &getCostMap();
596class LoopInterchangeProfitability {
598 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
599 OptimizationRemarkEmitter *ORE)
600 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
603 bool isProfitable(
const Loop *InnerLoop,
const Loop *OuterLoop,
604 unsigned InnerLoopId,
unsigned OuterLoopId,
605 CharMatrix &DepMatrix, CacheCostManager &CCM);
608 int getInstrOrderCost();
609 std::optional<bool> isProfitablePerLoopCacheAnalysis(
610 const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC);
611 std::optional<bool> isProfitablePerInstrOrderCost();
612 std::optional<bool> isProfitableForVectorization(
unsigned InnerLoopId,
613 unsigned OuterLoopId,
614 CharMatrix &DepMatrix);
622 OptimizationRemarkEmitter *ORE;
626class LoopInterchangeTransform {
628 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
629 LoopInfo *LI, DominatorTree *DT,
630 const LoopInterchangeLegality &LIL)
631 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
636 void reduction2Memory();
637 void restructureLoops(Loop *NewInner, Loop *NewOuter,
638 BasicBlock *OrigInnerPreHeader,
639 BasicBlock *OrigOuterPreHeader);
640 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
643 bool adjustLoopLinks();
644 bool adjustLoopBranches();
655 const LoopInterchangeLegality &LIL;
658struct LoopInterchange {
659 ScalarEvolution *SE =
nullptr;
660 LoopInfo *LI =
nullptr;
661 DependenceInfo *DI =
nullptr;
662 DominatorTree *DT =
nullptr;
663 LoopStandardAnalysisResults *AR =
nullptr;
666 OptimizationRemarkEmitter *ORE;
668 LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
669 DominatorTree *DT, LoopStandardAnalysisResults *AR,
670 OptimizationRemarkEmitter *ORE)
671 : SE(SE), LI(LI), DI(DI), DT(DT), AR(AR), ORE(ORE) {}
674 if (
L->getParentLoop())
676 SmallVector<Loop *, 8> LoopList;
678 return processLoopList(LoopList);
681 bool run(LoopNest &LN) {
682 SmallVector<Loop *, 8> LoopList(LN.
getLoops());
683 for (
unsigned I = 1;
I < LoopList.size(); ++
I)
684 if (LoopList[
I]->getParentLoop() != LoopList[
I - 1])
686 return processLoopList(LoopList);
692 return LoopList.
size() - 1;
695 bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
700 "Unsupported depth of loop nest.");
702 unsigned LoopNestDepth = LoopList.
size();
705 dbgs() <<
"Processing LoopList of size = " << LoopNestDepth
706 <<
" containing the following loops:\n";
707 for (
auto *L : LoopList) {
713 CharMatrix DependencyMatrix;
714 Loop *OuterMostLoop = *(LoopList.begin());
716 OuterMostLoop, DI, SE, ORE)) {
728 <<
"' needs an unique exit block");
732 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
733 CacheCostManager CCM(LoopList[0], AR, DI);
738 for (
unsigned j = SelecLoopId;
j > 0;
j--) {
739 bool ChangedPerIter =
false;
740 for (
unsigned i = SelecLoopId; i > SelecLoopId -
j; i--) {
742 processLoop(LoopList, i, i - 1, DependencyMatrix, CCM);
743 ChangedPerIter |= Interchanged;
754 bool processLoop(SmallVectorImpl<Loop *> &LoopList,
unsigned InnerLoopId,
755 unsigned OuterLoopId,
756 std::vector<std::vector<char>> &DependencyMatrix,
757 CacheCostManager &CCM) {
758 Loop *OuterLoop = LoopList[OuterLoopId];
759 Loop *InnerLoop = LoopList[InnerLoopId];
761 <<
" and OuterLoopId = " << OuterLoopId <<
"\n");
762 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE, DT);
763 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
764 LLVM_DEBUG(
dbgs() <<
"Cannot prove legality, not interchanging loops '"
765 << OuterLoop->
getName() <<
"' and '"
766 << InnerLoop->
getName() <<
"'\n");
771 <<
"' are legal to interchange\n");
772 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
773 if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
774 DependencyMatrix, CCM)) {
776 <<
"' and '" << InnerLoop->
getName()
777 <<
"' not profitable.\n");
782 return OptimizationRemark(
DEBUG_TYPE,
"Interchanged",
785 <<
"Loop interchanged with enclosing loop.";
788 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
789 LIT.transform(LIL.getHasNoWrapReductions(), LIL.getHasNoInfInsts());
791 << OuterLoop->
getName() <<
"' and inner loop '"
792 << InnerLoop->
getName() <<
"'\n");
798 std::swap(LoopList[OuterLoopId], LoopList[InnerLoopId]);
811bool LoopInterchangeLegality::containsUnsafeInstructions(
BasicBlock *BB,
813 return any_of(*BB, [Skip](
const Instruction &
I) {
816 return I.mayHaveSideEffects() ||
I.mayReadFromMemory();
820bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
826 <<
"' and '" << InnerLoop->
getName()
827 <<
"' are tightly nested\n");
832 for (BasicBlock *Succ :
successors(OuterLoopHeader))
833 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->
getHeader() &&
834 Succ != OuterLoopLatch)
837 LLVM_DEBUG(
dbgs() <<
"Checking instructions in Loop header and Loop latch\n");
843 assert(InnerReductions.size() <= 1 &&
844 "So far we only support at most one reduction.");
845 if (InnerReductions.size() == 1)
846 Skip = InnerReductions[0].LcssaStore;
850 if (containsUnsafeInstructions(OuterLoopHeader, Skip) ||
851 containsUnsafeInstructions(OuterLoopLatch, Skip))
857 if (InnerLoopPreHeader != OuterLoopHeader &&
858 containsUnsafeInstructions(InnerLoopPreHeader, Skip))
866 if (&SuccInner != OuterLoopLatch) {
868 <<
" does not lead to the outer loop latch.\n";);
874 if (containsUnsafeInstructions(InnerLoopExit, Skip))
882bool LoopInterchangeLegality::isLoopStructureUnderstood() {
884 for (PHINode *InnerInduction : InnerLoopInductions) {
885 unsigned Num = InnerInduction->getNumOperands();
886 for (
unsigned i = 0; i < Num; ++i) {
887 Value *Val = InnerInduction->getOperand(i);
897 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
898 InnerLoopPreheader &&
912 CondBrInst *InnerLoopLatchBI =
914 if (!InnerLoopLatchBI)
916 if (CmpInst *InnerLoopCmp =
918 Value *Op0 = InnerLoopCmp->getOperand(0);
919 Value *Op1 = InnerLoopCmp->getOperand(1);
930 std::function<bool(
Value *)> IsPathToInnerIndVar;
931 IsPathToInnerIndVar = [
this, &IsPathToInnerIndVar](
const Value *
V) ->
bool {
940 return IsPathToInnerIndVar(
I->getOperand(0));
942 return IsPathToInnerIndVar(
I->getOperand(0)) &&
943 IsPathToInnerIndVar(
I->getOperand(1));
949 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
957 }
else if (IsPathToInnerIndVar(Op1) && !
isa<Constant>(Op1)) {
980 if (
PHI->getNumIncomingValues() != 1)
1048 assert(
I->getOpcode() == OpCode &&
1049 "Expected the instruction to be the reduction operation");
1054 if (
I->hasNoSignedWrap() ||
I->hasNoUnsignedWrap())
1078 if (
PHI->getNumIncomingValues() == 1)
1091bool LoopInterchangeLegality::isInnerReduction(
1092 Loop *L, PHINode *Phi, SmallVectorImpl<Instruction *> &HasNoWrapInsts) {
1096 if (!
L->isInnermost()) {
1097 LLVM_DEBUG(
dbgs() <<
"Only supported when the loop is the innermost.\n");
1099 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerReduction",
1100 L->getStartLoc(),
L->getHeader())
1101 <<
"Only supported when the loop is the innermost.";
1106 if (
Phi->getNumIncomingValues() != 2)
1109 Value *Init =
Phi->getIncomingValueForBlock(
L->getLoopPreheader());
1110 Value *
Next =
Phi->getIncomingValueForBlock(
L->getLoopLatch());
1116 <<
"Only supported for the reduction with a constant initial value.\n");
1118 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerReduction",
1119 L->getStartLoc(),
L->getHeader())
1120 <<
"Only supported for the reduction with a constant initial "
1129 if (!
L->contains(BB))
1134 if (!
Phi->hasOneUser())
1146 PHINode *Lcssa = NULL;
1147 for (
auto *U :
Next->users()) {
1152 if (Lcssa == NULL &&
P->getParent() == ExitBlock &&
1153 P->getIncomingValueForBlock(
L->getLoopLatch()) ==
Next)
1164 LLVM_DEBUG(
dbgs() <<
"Only supported when the reduction is used once in "
1165 "the outer loop.\n");
1167 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerReduction",
1168 L->getStartLoc(),
L->getHeader())
1169 <<
"Only supported when the reduction is used once in the outer "
1175 StoreInst *LcssaStore =
1177 if (!LcssaStore || LcssaStore->
getParent() != ExitBlock)
1190 LLVM_DEBUG(
dbgs() <<
"Only supported when memory reference dominate "
1191 "the inner loop.\n");
1193 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerReduction",
1194 L->getStartLoc(),
L->getHeader())
1195 <<
"Only supported when memory reference dominate the inner "
1206 SR.LcssaPhi = Lcssa;
1207 SR.LcssaStore = LcssaStore;
1211 InnerReductions.push_back(SR);
1215bool LoopInterchangeLegality::findInductionAndReductions(
1217 if (!
L->getLoopLatch() || !
L->getLoopPredecessor())
1219 for (PHINode &
PHI :
L->getHeader()->phis()) {
1220 InductionDescriptor
ID;
1227 if (OuterInnerReductions.count(&
PHI)) {
1228 LLVM_DEBUG(
dbgs() <<
"Found a reduction across the outer loop.\n");
1230 isInnerReduction(L, &
PHI, HasNoWrapReductions)) {
1236 assert(
PHI.getNumIncomingValues() == 2 &&
1237 "Phis in loop header should have exactly 2 incoming values");
1242 InnerLoop, V, HasNoWrapReductions, HasNoInfInsts);
1247 <<
"Failed to recognize PHI as an induction or reduction.\n");
1250 OuterInnerReductions.insert(&
PHI);
1251 OuterInnerReductions.insert(InnerRedPhi);
1257 if (InnerReductions.size() > 1) {
1260 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerReduction",
1261 L->getStartLoc(),
L->getHeader())
1262 <<
"Only supports at most one reduction.";
1272bool LoopInterchangeLegality::currentLimitations() {
1282 dbgs() <<
"Loops where the latch is not the exiting block are not"
1283 <<
" supported currently.\n");
1285 return OptimizationRemarkMissed(
DEBUG_TYPE,
"ExitingNotLatch",
1288 <<
"Loops where the latch is not the exiting block cannot be"
1289 " interchange currently.";
1295 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
1297 dbgs() <<
"Only outer loops with induction or reduction PHI nodes "
1298 <<
"are supported currently.\n");
1300 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedPHIOuter",
1303 <<
"Only outer loops with induction or reduction PHI nodes can be"
1304 " interchanged currently.";
1313 Loop *CurLevelLoop = OuterLoop;
1316 CurLevelLoop = CurLevelLoop->
getSubLoops().front();
1317 if (!findInductionAndReductions(CurLevelLoop, Inductions,
nullptr)) {
1319 dbgs() <<
"Only inner loops with induction or reduction PHI nodes "
1320 <<
"are supported currently.\n");
1322 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedPHIInner",
1325 <<
"Only inner loops with induction or reduction PHI nodes can be"
1326 " interchange currently.";
1333 if (!isLoopStructureUnderstood()) {
1336 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedStructureInner",
1339 <<
"Inner loop structure not understood currently.";
1347bool LoopInterchangeLegality::findInductions(
1348 Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
1349 for (PHINode &
PHI :
L->getHeader()->phis()) {
1350 InductionDescriptor
ID;
1354 return !Inductions.
empty();
1374 if (
PHI.getNumIncomingValues() > 1)
1376 if (&
PHI == LcssaReduction)
1379 PHINode *PN = dyn_cast<PHINode>(U);
1382 if (Reductions.count(PN))
1384 BasicBlock *PB = PN->getParent();
1385 if (!OuterL->contains(PB))
1387 return PB != OuterL->getLoopLatch();
1404 for (
Value *Incoming :
PHI.incoming_values()) {
1451 for (
auto *U :
PHI.users()) {
1460bool LoopInterchangeLegality::canInterchangeLoops(
unsigned InnerLoopId,
1461 unsigned OuterLoopId,
1462 CharMatrix &DepMatrix) {
1464 LLVM_DEBUG(
dbgs() <<
"Failed interchange InnerLoopId = " << InnerLoopId
1465 <<
" and OuterLoopId = " << OuterLoopId
1466 <<
" due to dependence\n");
1468 return OptimizationRemarkMissed(
DEBUG_TYPE,
"Dependence",
1471 <<
"Cannot interchange loops due to dependences.";
1476 for (
auto *BB : OuterLoop->
blocks())
1477 for (Instruction &
I : *BB)
1483 dbgs() <<
"Loops with call instructions cannot be interchanged "
1486 return OptimizationRemarkMissed(
DEBUG_TYPE,
"CallInst",
1489 <<
"Cannot interchange loops due to call instruction.";
1495 if (!findInductions(InnerLoop, InnerLoopInductions)) {
1496 LLVM_DEBUG(
dbgs() <<
"Could not find inner loop induction variables.\n");
1501 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in inner loop latch.\n");
1503 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerLatchPHI",
1506 <<
"Cannot interchange loops because unsupported PHI nodes found "
1507 "in inner loop latch.";
1514 if (currentLimitations()) {
1515 LLVM_DEBUG(
dbgs() <<
"Not legal because of current transform limitation\n");
1520 if (!tightlyNested(OuterLoop, InnerLoop)) {
1523 return OptimizationRemarkMissed(
DEBUG_TYPE,
"NotTightlyNested",
1526 <<
"Cannot interchange loops because they are not tightly "
1534 PHINode *LcssaReduction =
nullptr;
1535 assert(InnerReductions.size() <= 1 &&
1536 "So far we only support at most one reduction.");
1537 if (InnerReductions.size() == 1)
1538 LcssaReduction = InnerReductions[0].LcssaPhi;
1542 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in inner loop exit.\n");
1544 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedExitPHI",
1547 <<
"Found unsupported PHI node in loop exit.";
1553 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in outer loop exit.\n");
1555 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedExitPHI",
1558 <<
"Found unsupported PHI node in loop exit.";
1564 [](PHINode &
PHI) { return PHI.getNumIncomingValues() != 1; })) {
1565 LLVM_DEBUG(
dbgs() <<
"Only outer loop latch PHI nodes with one incoming "
1566 "value are supported.\n");
1568 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedLatchPHI",
1571 <<
"Only outer loop latch PHI nodes with one incoming value are "
1580void CacheCostManager::computeIfUnitinialized() {
1595 for (
const auto &[Idx,
Cost] :
enumerate((*CC)->getLoopCosts()))
1596 CostMap[
Cost.first] = Idx;
1599CacheCost *CacheCostManager::getCacheCost() {
1600 computeIfUnitinialized();
1604const DenseMap<const Loop *, unsigned> &CacheCostManager::getCostMap() {
1605 computeIfUnitinialized();
1621 return std::nullopt;
1626 return std::nullopt;
1629 std::optional<const SCEV *> Coeff =
1631 if (!Coeff.has_value())
1632 return std::nullopt;
1635 assert(!*Coeff &&
"Found more than one addrec for the same loop");
1641int LoopInterchangeProfitability::getInstrOrderCost() {
1642 SmallPtrSet<const SCEV *, 4> GoodBasePtrs, BadBasePtrs;
1643 for (BasicBlock *BB : InnerLoop->
blocks()) {
1644 for (Instruction &Ins : *BB) {
1649 std::optional<const SCEV *> OuterCoeff =
1651 std::optional<const SCEV *> InnerCoeff =
1654 if (!OuterCoeff.has_value() || !*OuterCoeff || !InnerCoeff.has_value() ||
1664 const SCEV *OuterStep = SE->
getAbsExpr(*OuterCoeff,
false);
1665 const SCEV *InnerStep = SE->
getAbsExpr(*InnerCoeff,
false);
1685 GoodBasePtrs.
insert(BasePtr);
1687 BadBasePtrs.
insert(BasePtr);
1691 int GoodOrder = GoodBasePtrs.
size();
1692 int BadOrder = BadBasePtrs.
size();
1693 return GoodOrder - BadOrder;
1697LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
1698 const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC) {
1702 auto InnerLoopIt = CostMap.
find(InnerLoop);
1703 if (InnerLoopIt == CostMap.
end())
1704 return std::nullopt;
1705 auto OuterLoopIt = CostMap.
find(OuterLoop);
1706 if (OuterLoopIt == CostMap.
end())
1707 return std::nullopt;
1710 return std::nullopt;
1711 unsigned InnerIndex = InnerLoopIt->second;
1712 unsigned OuterIndex = OuterLoopIt->second;
1714 <<
", OuterIndex = " << OuterIndex <<
"\n");
1715 assert(InnerIndex != OuterIndex &&
"CostMap should assign unique "
1716 "numbers to each loop");
1717 return std::optional<bool>(InnerIndex < OuterIndex);
1721LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
1725 int Cost = getInstrOrderCost();
1728 return std::optional<bool>(
true);
1730 return std::nullopt;
1735 for (
const auto &Dep : DepMatrix) {
1736 char Dir = Dep[LoopId];
1737 char DepType = Dep.back();
1738 assert((DepType ==
'<' || DepType ==
'*') &&
1739 "Unexpected element in dependency vector");
1742 if (Dir ==
'=' || Dir ==
'I')
1748 if (Dir ==
'<' && DepType ==
'<')
1757std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1758 unsigned InnerLoopId,
unsigned OuterLoopId, CharMatrix &DepMatrix) {
1774 return std::nullopt;
1777bool LoopInterchangeProfitability::isProfitable(
1778 const Loop *InnerLoop,
const Loop *OuterLoop,
unsigned InnerLoopId,
1779 unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
1788 if (InnerBTC && InnerBTC->
isZero()) {
1789 LLVM_DEBUG(
dbgs() <<
"Inner loop back-edge isn't taken, rejecting "
1790 "single iteration loop\n");
1793 if (OuterBTC && OuterBTC->
isZero()) {
1794 LLVM_DEBUG(
dbgs() <<
"Outer loop back-edge isn't taken, rejecting "
1795 "single iteration loop\n");
1803 "Duplicate rules and option 'ignore' are not allowed");
1813 std::optional<bool> shouldInterchange;
1816 case RuleTy::PerLoopCacheAnalysis: {
1817 CacheCost *CC = CCM.getCacheCost();
1818 const DenseMap<const Loop *, unsigned> &CostMap = CCM.getCostMap();
1819 shouldInterchange = isProfitablePerLoopCacheAnalysis(CostMap, CC);
1822 case RuleTy::PerInstrOrderCost:
1823 shouldInterchange = isProfitablePerInstrOrderCost();
1825 case RuleTy::ForVectorization:
1827 isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
1829 case RuleTy::Ignore:
1836 if (shouldInterchange.has_value())
1840 if (!shouldInterchange.has_value()) {
1842 return OptimizationRemarkMissed(
DEBUG_TYPE,
"InterchangeNotProfitable",
1845 <<
"Insufficient information to calculate the cost of loop for "
1849 }
else if (!shouldInterchange.value()) {
1851 return OptimizationRemarkMissed(
DEBUG_TYPE,
"InterchangeNotProfitable",
1854 <<
"Interchanging loops is not considered to improve cache "
1855 "locality nor vectorization.";
1862void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1864 for (Loop *L : *OuterLoop)
1865 if (L == InnerLoop) {
1866 OuterLoop->removeChildLoop(L);
1895void LoopInterchangeTransform::restructureLoops(
1896 Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1897 BasicBlock *OrigOuterPreHeader) {
1898 Loop *OuterLoopParent = OuterLoop->getParentLoop();
1905 if (OuterLoopParent) {
1907 removeChildLoop(OuterLoopParent, NewInner);
1908 removeChildLoop(NewInner, NewOuter);
1911 removeChildLoop(NewInner, NewOuter);
1919 SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->
blocks());
1923 for (BasicBlock *BB : NewInner->
blocks())
1931 for (BasicBlock *BB : OrigInnerBBs) {
1936 if (BB == OuterHeader || BB == OuterLatch)
1974void LoopInterchangeTransform::reduction2Memory() {
1976 LIL.getInnerReductions();
1979 "So far we only support at most one reduction.");
1981 LoopInterchangeLegality::InnerReduction SR = InnerReductions[0];
1987 PHINode *FirstIter =
1988 Builder.CreatePHI(Type::getInt1Ty(
Context), 2,
"first.iter");
1993 assert(FirstIter->
isComplete() &&
"The FirstIter PHI node is not complete.");
1998 Instruction *LoadMem = Builder.CreateLoad(SR.ElemTy, SR.MemRef);
2001 Value *NewVar = Builder.CreateSelect(FirstIter, SR.Init, LoadMem,
"new.var");
2012bool LoopInterchangeTransform::transform(
2015 bool Transformed =
false;
2018 LIL.getInnerReductions();
2019 if (InnerReductions.
size() == 1)
2023 auto &InductionPHIs = LIL.getInnerLoopInductions();
2024 if (InductionPHIs.empty()) {
2025 LLVM_DEBUG(
dbgs() <<
"Failed to find the point to split loop latch \n");
2029 SmallVector<Instruction *, 8> InnerIndexVarList;
2030 for (PHINode *CurInductionPHI : InductionPHIs) {
2032 CurInductionPHI->getIncomingValueForBlock(InnerLoop->
getLoopLatch()));
2034 "Incoming value from loop latch isn't an instruction");
2037 InnerIndexVarList.
push_back(IncomingValue);
2048 SmallSetVector<Instruction *, 4> WorkList;
2050 auto MoveInstructions = [&i, &WorkList,
this, &InductionPHIs, NewLatch]() {
2051 for (; i < WorkList.
size(); i++) {
2057 "Moving instructions with side-effects may change behavior of "
2068 for (
Value *
Op : WorkList[i]->operands()) {
2085 for (Instruction *InnerIndexVar : InnerIndexVarList)
2103 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
2105 if (InnerLoopPreHeader != OuterLoopHeader) {
2108 assert(
P.getNumIncomingValues() == 1 &&
2109 "Expected single-incoming PHIs in inner loop preheader");
2110 P.replaceAllUsesWith(
P.getIncomingValue(0));
2111 P.eraseFromParent();
2113 for (Instruction &
I :
2115 std::prev(InnerLoopPreHeader->
end()))))
2119 Transformed |= adjustLoopLinks();
2127 for (Instruction *
Reduction : DropNoWrapInsts) {
2131 for (Instruction *
I : DropNoInfInsts)
2132 I->setHasNoInfs(
false);
2153 I->removeFromParent();
2168 std::vector<DominatorTree::UpdateType> &DTUpdates,
2169 bool MustUpdateOnce =
true) {
2171 "BI must jump to OldBB exactly once.");
2173 for (
Use &
Op : Term->operands())
2180 DTUpdates.push_back(
2181 {DominatorTree::UpdateKind::Insert, Term->getParent(), NewBB});
2182 DTUpdates.push_back(
2183 {DominatorTree::UpdateKind::Delete, Term->getParent(), OldBB});
2202 assert(
P.getNumIncomingValues() == 1 &&
2203 "Only loops with a single exit are supported!");
2212 if (IncIInnerMost->getParent() != InnerLatch &&
2213 IncIInnerMost->
getParent() != InnerHeader)
2217 [OuterHeader, OuterExit, IncI, InnerHeader](
User *U) {
2218 return (cast<PHINode>(U)->getParent() == OuterHeader &&
2219 IncI->getParent() == InnerHeader) ||
2220 cast<PHINode>(U)->getParent() == OuterExit;
2222 "Can only replace phis iff the uses are in the loop nest exit or "
2223 "the incoming value is defined in the inner header (it will "
2224 "dominate all loop blocks after interchanging)");
2225 P.replaceAllUsesWith(IncI);
2226 P.eraseFromParent();
2254 if (
P.getNumIncomingValues() != 1)
2268 if (Pred == OuterLatch)
2273 P.setIncomingValue(0, NewPhi);
2313 if (OuterLoopLatch == InnerLoopExit)
2320 assert(Phi->getNumIncomingValues() == 1 &&
"Single input phi expected");
2321 LLVM_DEBUG(
dbgs() <<
"Removing 1-input phi in non-exit block: " << *Phi
2323 Phi->replaceAllUsesWith(Phi->getIncomingValue(0));
2324 Phi->eraseFromParent();
2328bool LoopInterchangeTransform::adjustLoopBranches() {
2330 std::vector<DominatorTree::UpdateType> DTUpdates;
2332 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
2335 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
2336 InnerLoopPreHeader != InnerLoop->
getHeader() && OuterLoopPreHeader &&
2337 InnerLoopPreHeader &&
"Guaranteed by loop-simplify form");
2347 OuterLoopPreHeader =
2349 if (InnerLoopPreHeader == OuterLoop->getHeader())
2350 InnerLoopPreHeader =
2355 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
2357 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
2364 CondBrInst *OuterLoopLatchBI =
2366 CondBrInst *InnerLoopLatchBI =
2371 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
2372 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
2380 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
2383 if (!InnerLoopHeaderSuccessor)
2391 InnerLoopPreHeader, DTUpdates,
false);
2401 InnerLoopHeaderSuccessor, DTUpdates,
2409 OuterLoopPreHeader, DTUpdates);
2412 if (InnerLoopLatchBI->
getSuccessor(0) == InnerLoopHeader)
2413 InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(1);
2415 InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(0);
2418 InnerLoopLatchSuccessor, DTUpdates);
2420 if (OuterLoopLatchBI->
getSuccessor(0) == OuterLoopHeader)
2421 OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(1);
2423 OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(0);
2426 OuterLoopLatchSuccessor, DTUpdates);
2427 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
2431 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
2432 OuterLoopPreHeader);
2434 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
2435 OuterLoopHeader, OuterLoopLatch, InnerLoop->
getExitBlock(),
2441 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
2444 for (PHINode &
PHI : InnerLoopHeader->
phis())
2445 if (OuterInnerReductions.contains(&
PHI))
2448 for (PHINode &
PHI : OuterLoopHeader->
phis())
2449 if (OuterInnerReductions.contains(&
PHI))
2455 for (PHINode *
PHI : OuterLoopPHIs) {
2458 assert(OuterInnerReductions.count(
PHI) &&
"Expected a reduction PHI node");
2460 for (PHINode *
PHI : InnerLoopPHIs) {
2463 assert(OuterInnerReductions.count(
PHI) &&
"Expected a reduction PHI node");
2476 SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
2477 for (Instruction &
I :
2485bool LoopInterchangeTransform::adjustLoopLinks() {
2487 bool Changed = adjustLoopBranches();
2492 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
2513 LLVM_DEBUG(
dbgs() <<
"Not valid loop candidate for interchange\n");
2521 <<
"Computed dependence info, invoking the transform.";
2525 if (!LoopInterchange(&AR.
SE, &AR.
LI, &DI, &AR.
DT, &AR, &ORE).run(LN))
2527 U.markLoopNestChanged(
true);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the StringMap class.
ReachingDefInfo InstSet InstSet & Ignore
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB)
Move the contents of SourceBB to before the last instruction of TargetBB.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the interface for the loop cache analysis.
SmallVector< Loop *, 4 > LoopVector
Loop::LoopBounds::Direction Direction
static cl::list< RuleTy > Profitabilities("loop-interchange-profitabilities", cl::MiscFlags::CommaSeparated, cl::Hidden, cl::desc("List of profitability heuristics to be used. They are applied in " "the given order"), cl::list_init< RuleTy >({RuleTy::PerInstrOrderCost, RuleTy::ForVectorization}), cl::values(clEnumValN(RuleTy::PerLoopCacheAnalysis, "cache", "Prioritize loop cache cost"), clEnumValN(RuleTy::PerInstrOrderCost, "instorder", "Prioritize the IVs order of each instruction"), clEnumValN(RuleTy::ForVectorization, "vectorize", "Prioritize vectorization"), clEnumValN(RuleTy::Ignore, "ignore", "Ignore profitability, force interchange (does not " "work with other options)")))
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
static cl::opt< unsigned int > MinLoopNestDepth("loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden, cl::desc("Minimum depth of loop nest considered for the transform"))
static void updateSuccessor(Instruction *Term, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
static cl::opt< bool > EnableReduction2Memory("loop-interchange-reduction-to-mem", cl::init(false), cl::Hidden, cl::desc("Support for the inner-loop reduction pattern."))
static bool isComputableLoopNest(ScalarEvolution *SE, ArrayRef< Loop * > LoopList)
std::optional< const SCEV * > getAddRecCoefficient(ScalarEvolution &SE, const SCEV *S, const Loop *L)
If \S contains an affine addrec for L, return the step recurrence of it.
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static void simplifyLCSSAPhis(Loop *OuterLoop, Loop *InnerLoop)
This deals with a corner case when a LCSSA phi node appears in a non-exit block: the outer loop latch...
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
static void printDepMatrix(CharMatrix &DepMatrix)
static cl::opt< unsigned int > MaxMemInstrRatio("loop-interchange-max-mem-instr-ratio", cl::init(4), cl::Hidden, cl::desc("Maximum number of load/store instructions squared in relation to " "the total number of instructions. Higher value may lead to more " "interchanges at the cost of compile-time"))
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
static PHINode * findInnerReductionPhi(Loop *L, Value *V, SmallVectorImpl< Instruction * > &HasNoWrapInsts, SmallVectorImpl< Instruction * > &HasNoInfInsts)
static bool areInnerLoopExitPHIsSupported(Loop *OuterL, Loop *InnerL, SmallPtrSetImpl< PHINode * > &Reductions, PHINode *LcssaReduction)
We currently only support LCSSA PHI nodes in the inner loop exit if their users are either of the fol...
static cl::opt< unsigned int > MaxLoopNestDepth("loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden, cl::desc("Maximum depth of loop nest considered for the transform"))
static bool hasSupportedLoopDepth(ArrayRef< Loop * > LoopList, OptimizationRemarkEmitter &ORE)
static bool inThisOrder(const Instruction *Src, const Instruction *Dst)
Return true if Src appears before Dst in the same basic block.
static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId)
Return true if we can vectorize the loop specified by LoopId.
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
static Value * followLCSSA(Value *SV)
static void populateWorklist(Loop &L, LoopVector &LoopList)
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE)
static std::optional< bool > isLexicographicallyPositive(ArrayRef< char > DV, unsigned Begin, unsigned End)
static bool checkReductionKind(Loop *L, PHINode *PHI, SmallVectorImpl< Instruction * > &HasNoWrapInsts, SmallVectorImpl< Instruction * > &HasNoInfInsts)
static bool noDuplicateRulesAndIgnore(ArrayRef< RuleTy > Rules)
This file defines the interface for the loop nest analysis.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
loop Loop Strength Reduction
uint64_t IntrinsicInst * II
SmallVector< Value *, 8 > ValueVector
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
Get the first element.
size_t size() const
Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
CacheCostTy getLoopCost(const Loop &L) const
Return the estimated cost of loop L if the given loop is part of the loop nest associated with this o...
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
iterator find(const_arg_type_t< KeyT > Val)
DependenceInfo - This class is the main dependence-analysis driver.
LLVM_ABI std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool UnderRuntimeAssumptions=false)
depends - Tests for a dependence between the Src and Dst instructions.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void changeLoopFor(const BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
This class represents a loop nest and can be used to query its properties.
static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End, bool CheckUniquePred=false)
Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Function * getParent() const
Return the function to which the loop-nest belongs.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
StringRef getName() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
bool isComplete() const
If the PHI node is complete which means all of its parent's predecessors have incoming value in this ...
op_range incoming_values()
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
static unsigned getIncomingValueNumForOperand(unsigned i)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
unsigned getOpcode() const
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
SCEVUse getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents an analyzed expression in the program.
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
The main scalar evolution driver.
LLVM_ABI const SCEV * getAbsExpr(const SCEV *Op, bool IsNSW)
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
Represent a constant reference to a string, i.e.
constexpr size_t size() const
Get the string size.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ BasicBlock
Various leaf nodes.
list_initializer< Ty > list_init(ArrayRef< Ty > Vals)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
NodeAddr< PhiNode * > Phi
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
LLVM_ABI BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
LLVM_ABI bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FMinimumNum
FP min with llvm.minimumnum semantics.
@ Or
Bitwise or logical OR of integers.
@ FMinimum
FP min with llvm.minimum semantics.
@ Mul
Product of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMaximum
FP max with llvm.maximum semantics.
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ FMaximumNum
FP max with llvm.maximumnum semantics.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...