54#define DEBUG_TYPE "loop-interchange" 
   56STATISTIC(LoopsInterchanged, 
"Number of loops interchanged");
 
   60    cl::desc(
"Interchange if you gain more than this number"));
 
   66        "Maximum number of load-store instructions that should be handled " 
   67        "in the dependency matrix. Higher value may lead to more interchanges " 
   68        "at the cost of compile-time"));
 
   82using CharMatrix = std::vector<std::vector<char>>;
 
   97    cl::desc(
"Minimum depth of loop nest considered for the transform"));
 
  102    cl::desc(
"Maximum depth of loop nest considered for the transform"));
 
  108    cl::desc(
"List of profitability heuristics to be used. They are applied in " 
  111                           RuleTy::PerInstrOrderCost,
 
  112                           RuleTy::ForVectorization}),
 
  114                          "Prioritize loop cache cost"),
 
  115               clEnumValN(RuleTy::PerInstrOrderCost, 
"instorder",
 
  116                          "Prioritize the IVs order of each instruction"),
 
  117               clEnumValN(RuleTy::ForVectorization, 
"vectorize",
 
  118                          "Prioritize vectorization"),
 
  120                          "Ignore profitability, force interchange (does not " 
  121                          "work with other options)")));
 
  126  for (RuleTy Rule : Rules) {
 
  127    if (!Set.insert(Rule).second)
 
  129    if (Rule == RuleTy::Ignore)
 
 
  136  for (
auto &Row : DepMatrix) {
 
 
  149  assert(Src->getParent() == Dst->getParent() && Src != Dst &&
 
  150         "Expected Src and Dst to be different instructions in the same BB");
 
  152  bool FoundSrc = 
false;
 
 
  193                    << 
" Loads and Stores to analyze\n");
 
  199                                      L->getStartLoc(), L->getHeader())
 
  200             << 
"Number of loads/stores exceeded, the supported maximum " 
  201                "can be increased with option " 
  202                "-loop-interchange-maxmeminstr-count.";
 
  212  for (
I = MemInstr.
begin(), IE = MemInstr.
end(); 
I != IE; ++
I) {
 
  213    for (J = 
I, JE = MemInstr.
end(); J != JE; ++J) {
 
  214      std::vector<char> Dep;
 
  221      if (
auto D = DI->
depends(Src, Dst)) {
 
  222        assert(
D->isOrdered() && 
"Expected an output, flow or anti dep.");
 
  225        if (
D->normalize(SE))
 
  228                       D->isFlow() ? 
"flow" : 
D->isAnti() ? 
"anti" : 
"output";
 
  229                   dbgs() << 
"Found " << DepType
 
  230                          << 
" dependency between Src and Dst\n" 
  231                          << 
" Src:" << *Src << 
"\n Dst:" << *Dst << 
'\n');
 
  232        unsigned Levels = 
D->getLevels();
 
  234        for (
unsigned II = 1; 
II <= Levels; ++
II) {
 
  241          unsigned Dir = 
D->getDirection(
II);
 
  255        if (
D->isConfused()) {
 
  256          assert(Dep.empty() && 
"Expected empty dependency vector");
 
  257          Dep.assign(Level, 
'*');
 
  260        while (Dep.size() != Level) {
 
  266        if (
all_of(Dep, [](
char C) { 
return C == 
'*'; })) {
 
  269                                            L->getStartLoc(), L->getHeader())
 
  270                   << 
"All loops have dependencies in all directions.";
 
  276        bool IsKnownForward = 
true;
 
  277        if (Src->getParent() != Dst->getParent()) {
 
  281          IsKnownForward = 
false;
 
  287                 "Unexpected instructions");
 
  292          bool IsReversed = 
D->getSrc() != Src;
 
  294            IsKnownForward = 
false;
 
  310          DepMatrix.push_back(Dep);
 
  317          DepMatrix[Ite->second].back() = 
'*';
 
 
  329  for (
auto &Row : DepMatrix)
 
 
  338static std::optional<bool>
 
  351                                      unsigned InnerLoopId,
 
  352                                      unsigned OuterLoopId) {
 
  353  unsigned NumRows = DepMatrix.size();
 
  354  std::vector<char> Cur;
 
  356  for (
unsigned Row = 0; Row < NumRows; ++Row) {
 
  359    Cur = DepMatrix[Row];
 
  372    std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
 
 
  381                    << L.getHeader()->getParent()->getName() << 
" Loop: %" 
  382                    << L.getHeader()->getName() << 
'\n');
 
  383  assert(LoopList.
empty() && 
"LoopList should initially be empty!");
 
  384  Loop *CurrentLoop = &L;
 
  385  const std::vector<Loop *> *Vec = &CurrentLoop->
getSubLoops();
 
  386  while (!Vec->empty()) {
 
  390    if (Vec->size() != 1) {
 
  396    CurrentLoop = Vec->front();
 
 
  404  unsigned LoopNestDepth = LoopList.
size();
 
  406    LLVM_DEBUG(
dbgs() << 
"Unsupported depth of loop nest " << LoopNestDepth
 
  414             << 
"Unsupported depth of loop nest, the supported range is [" 
 
  425  for (
Loop *L : LoopList) {
 
  431    if (L->getNumBackEdges() != 1) {
 
  435    if (!L->getExitingBlock()) {
 
 
  446class LoopInterchangeLegality {
 
  448  LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
 
  449                          OptimizationRemarkEmitter *ORE)
 
  450      : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
 
  453  bool canInterchangeLoops(
unsigned InnerLoopId, 
unsigned OuterLoopId,
 
  454                           CharMatrix &DepMatrix);
 
  458  bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
 
  462  bool isLoopStructureUnderstood();
 
  464  bool currentLimitations();
 
  466  const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions()
 const {
 
  467    return OuterInnerReductions;
 
  471    return InnerLoopInductions;
 
  475    return HasNoWrapReductions;
 
  479  bool tightlyNested(Loop *Outer, Loop *Inner);
 
  480  bool containsUnsafeInstructions(BasicBlock *BB);
 
  486  bool findInductionAndReductions(Loop *L,
 
  496  OptimizationRemarkEmitter *ORE;
 
  500  SmallPtrSet<PHINode *, 4> OuterInnerReductions;
 
  508  SmallVector<Instruction *, 4> HasNoWrapReductions;
 
  514class CacheCostManager {
 
  516  LoopStandardAnalysisResults *AR;
 
  521  std::optional<std::unique_ptr<CacheCost>> CC;
 
  525  DenseMap<const Loop *, unsigned> CostMap;
 
  527  void computeIfUnitinialized();
 
  530  CacheCostManager(Loop *OutermostLoop, LoopStandardAnalysisResults *AR,
 
  532      : OutermostLoop(OutermostLoop), AR(AR), DI(DI) {}
 
  533  CacheCost *getCacheCost();
 
  534  const DenseMap<const Loop *, unsigned> &getCostMap();
 
  539class LoopInterchangeProfitability {
 
  541  LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
 
  542                               OptimizationRemarkEmitter *ORE)
 
  543      : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
 
  546  bool isProfitable(
const Loop *InnerLoop, 
const Loop *OuterLoop,
 
  547                    unsigned InnerLoopId, 
unsigned OuterLoopId,
 
  548                    CharMatrix &DepMatrix, CacheCostManager &CCM);
 
  551  int getInstrOrderCost();
 
  552  std::optional<bool> isProfitablePerLoopCacheAnalysis(
 
  553      const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC);
 
  554  std::optional<bool> isProfitablePerInstrOrderCost();
 
  555  std::optional<bool> isProfitableForVectorization(
unsigned InnerLoopId,
 
  556                                                   unsigned OuterLoopId,
 
  557                                                   CharMatrix &DepMatrix);
 
  565  OptimizationRemarkEmitter *ORE;
 
  569class LoopInterchangeTransform {
 
  571  LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
 
  572                           LoopInfo *LI, DominatorTree *DT,
 
  573                           const LoopInterchangeLegality &LIL)
 
  574      : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
 
  578  void restructureLoops(Loop *NewInner, Loop *NewOuter,
 
  579                        BasicBlock *OrigInnerPreHeader,
 
  580                        BasicBlock *OrigOuterPreHeader);
 
  581  void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
 
  584  bool adjustLoopLinks();
 
  585  bool adjustLoopBranches();
 
  596  const LoopInterchangeLegality &LIL;
 
  599struct LoopInterchange {
 
  600  ScalarEvolution *SE = 
nullptr;
 
  601  LoopInfo *LI = 
nullptr;
 
  602  DependenceInfo *DI = 
nullptr;
 
  603  DominatorTree *DT = 
nullptr;
 
  604  LoopStandardAnalysisResults *AR = 
nullptr;
 
  607  OptimizationRemarkEmitter *ORE;
 
  609  LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
 
  610                  DominatorTree *DT, LoopStandardAnalysisResults *AR,
 
  611                  OptimizationRemarkEmitter *ORE)
 
  612      : SE(SE), LI(LI), DI(DI), DT(DT), AR(AR), ORE(ORE) {}
 
  615    if (
L->getParentLoop())
 
  617    SmallVector<Loop *, 8> LoopList;
 
  619    return processLoopList(LoopList);
 
  622  bool run(LoopNest &LN) {
 
  623    SmallVector<Loop *, 8> LoopList(LN.
getLoops());
 
  624    for (
unsigned I = 1; 
I < LoopList.size(); ++
I)
 
  625      if (LoopList[
I]->getParentLoop() != LoopList[
I - 1])
 
  627    return processLoopList(LoopList);
 
  633    return LoopList.
size() - 1;
 
  636  bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
 
  641           "Unsupported depth of loop nest.");
 
  643    unsigned LoopNestDepth = LoopList.
size();
 
  645    LLVM_DEBUG(
dbgs() << 
"Processing LoopList of size = " << LoopNestDepth
 
  648    CharMatrix DependencyMatrix;
 
  649    Loop *OuterMostLoop = *(LoopList.
begin());
 
  651                                  OuterMostLoop, DI, SE, ORE)) {
 
  666    unsigned SelecLoopId = selectLoopForInterchange(LoopList);
 
  667    CacheCostManager CCM(LoopList[0], AR, DI);
 
  672    for (
unsigned j = SelecLoopId; 
j > 0; 
j--) {
 
  673      bool ChangedPerIter = 
false;
 
  674      for (
unsigned i = SelecLoopId; i > SelecLoopId - 
j; i--) {
 
  676            processLoop(LoopList, i, i - 1, DependencyMatrix, CCM);
 
  677        ChangedPerIter |= Interchanged;
 
  688  bool processLoop(SmallVectorImpl<Loop *> &LoopList, 
unsigned InnerLoopId,
 
  689                   unsigned OuterLoopId,
 
  690                   std::vector<std::vector<char>> &DependencyMatrix,
 
  691                   CacheCostManager &CCM) {
 
  692    Loop *OuterLoop = LoopList[OuterLoopId];
 
  693    Loop *InnerLoop = LoopList[InnerLoopId];
 
  695                      << 
" and OuterLoopId = " << OuterLoopId << 
"\n");
 
  696    LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
 
  697    if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
 
  698      LLVM_DEBUG(
dbgs() << 
"Not interchanging loops. Cannot prove legality.\n");
 
  702    LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
 
  703    if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
 
  704                          DependencyMatrix, CCM)) {
 
  710      return OptimizationRemark(
DEBUG_TYPE, 
"Interchanged",
 
  713             << 
"Loop interchanged with enclosing loop.";
 
  716    LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
 
  717    LIT.transform(LIL.getHasNoWrapReductions());
 
  724    std::swap(LoopList[OuterLoopId], LoopList[InnerLoopId]);
 
  737bool LoopInterchangeLegality::containsUnsafeInstructions(
BasicBlock *BB) {
 
  738  return any_of(*BB, [](
const Instruction &
I) {
 
  739    return I.mayHaveSideEffects() || 
I.mayReadFromMemory();
 
  743bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
 
  753  BranchInst *OuterLoopHeaderBI =
 
  755  if (!OuterLoopHeaderBI)
 
  758  for (BasicBlock *Succ : 
successors(OuterLoopHeaderBI))
 
  759    if (Succ != InnerLoopPreHeader && Succ != InnerLoop->
getHeader() &&
 
  760        Succ != OuterLoopLatch)
 
  763  LLVM_DEBUG(
dbgs() << 
"Checking instructions in Loop header and Loop latch\n");
 
  766  if (containsUnsafeInstructions(OuterLoopHeader) ||
 
  767      containsUnsafeInstructions(OuterLoopLatch))
 
  773  if (InnerLoopPreHeader != OuterLoopHeader &&
 
  774      containsUnsafeInstructions(InnerLoopPreHeader))
 
  782  if (&SuccInner != OuterLoopLatch) {
 
  784                      << 
" does not lead to the outer loop latch.\n";);
 
  790  if (containsUnsafeInstructions(InnerLoopExit))
 
  798bool LoopInterchangeLegality::isLoopStructureUnderstood() {
 
  800  for (PHINode *InnerInduction : InnerLoopInductions) {
 
  801    unsigned Num = InnerInduction->getNumOperands();
 
  802    for (
unsigned i = 0; i < Num; ++i) {
 
  803      Value *Val = InnerInduction->getOperand(i);
 
  813      if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
 
  814              InnerLoopPreheader &&
 
  828  BranchInst *InnerLoopLatchBI =
 
  832  if (CmpInst *InnerLoopCmp =
 
  834    Value *Op0 = InnerLoopCmp->getOperand(0);
 
  835    Value *Op1 = InnerLoopCmp->getOperand(1);
 
  846    std::function<bool(
Value *)> IsPathToInnerIndVar;
 
  847    IsPathToInnerIndVar = [
this, &IsPathToInnerIndVar](
const Value *
V) -> 
bool {
 
  856        return IsPathToInnerIndVar(
I->getOperand(0));
 
  858        return IsPathToInnerIndVar(
I->getOperand(0)) &&
 
  859               IsPathToInnerIndVar(
I->getOperand(1));
 
  865    if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
 
  873    } 
else if (IsPathToInnerIndVar(Op1) && !
isa<Constant>(Op1)) {
 
  896  if (
PHI->getNumIncomingValues() != 1)
 
 
  911      if (
PHI->getNumIncomingValues() == 1)
 
  963            assert(
I->getOpcode() == OpCode &&
 
  964                   "Expected the instruction to be the reduction operation");
 
  969            if (
I->hasNoSignedWrap() || 
I->hasNoUnsignedWrap())
 
 
  986bool LoopInterchangeLegality::findInductionAndReductions(
 
  988  if (!
L->getLoopLatch() || !
L->getLoopPredecessor())
 
  990  for (PHINode &
PHI : 
L->getHeader()->phis()) {
 
  991    InductionDescriptor 
ID;
 
  998        if (!OuterInnerReductions.count(&
PHI)) {
 
 1000                               "across the outer loop.\n");
 
 1004        assert(
PHI.getNumIncomingValues() == 2 &&
 
 1005               "Phis in loop header should have exactly 2 incoming values");
 
 1009        PHINode *InnerRedPhi =
 
 1015              << 
"Failed to recognize PHI as an induction or reduction.\n");
 
 1018        OuterInnerReductions.insert(&
PHI);
 
 1019        OuterInnerReductions.insert(InnerRedPhi);
 
 1028bool LoopInterchangeLegality::currentLimitations() {
 
 1038        dbgs() << 
"Loops where the latch is not the exiting block are not" 
 1039               << 
" supported currently.\n");
 
 1041      return OptimizationRemarkMissed(
DEBUG_TYPE, 
"ExitingNotLatch",
 
 1044             << 
"Loops where the latch is not the exiting block cannot be" 
 1045                " interchange currently.";
 
 1051  if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
 
 1053        dbgs() << 
"Only outer loops with induction or reduction PHI nodes " 
 1054               << 
"are supported currently.\n");
 
 1056      return OptimizationRemarkMissed(
DEBUG_TYPE, 
"UnsupportedPHIOuter",
 
 1059             << 
"Only outer loops with induction or reduction PHI nodes can be" 
 1060                " interchanged currently.";
 
 1069  Loop *CurLevelLoop = OuterLoop;
 
 1072    CurLevelLoop = CurLevelLoop->
getSubLoops().front();
 
 1073    if (!findInductionAndReductions(CurLevelLoop, Inductions, 
nullptr)) {
 
 1075          dbgs() << 
"Only inner loops with induction or reduction PHI nodes " 
 1076                << 
"are supported currently.\n");
 
 1078        return OptimizationRemarkMissed(
DEBUG_TYPE, 
"UnsupportedPHIInner",
 
 1081              << 
"Only inner loops with induction or reduction PHI nodes can be" 
 1082                  " interchange currently.";
 
 1089  if (!isLoopStructureUnderstood()) {
 
 1092      return OptimizationRemarkMissed(
DEBUG_TYPE, 
"UnsupportedStructureInner",
 
 1095             << 
"Inner loop structure not understood currently.";
 
 1103bool LoopInterchangeLegality::findInductions(
 
 1104    Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
 
 1105  for (PHINode &
PHI : 
L->getHeader()->phis()) {
 
 1106    InductionDescriptor 
ID;
 
 1110  return !Inductions.
empty();
 
 1122    if (
PHI.getNumIncomingValues() > 1)
 
 1125          PHINode *PN = dyn_cast<PHINode>(U);
 
 1127                 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
 
 
 1192    for (
auto *U : 
PHI.users()) {
 
 
 1201bool LoopInterchangeLegality::canInterchangeLoops(
unsigned InnerLoopId,
 
 1202                                                  unsigned OuterLoopId,
 
 1203                                                  CharMatrix &DepMatrix) {
 
 1205    LLVM_DEBUG(
dbgs() << 
"Failed interchange InnerLoopId = " << InnerLoopId
 
 1206                      << 
" and OuterLoopId = " << OuterLoopId
 
 1207                      << 
" due to dependence\n");
 
 1209      return OptimizationRemarkMissed(
DEBUG_TYPE, 
"Dependence",
 
 1212             << 
"Cannot interchange loops due to dependences.";
 
 1217  for (
auto *BB : OuterLoop->
blocks())
 
 1221        if (CI->onlyWritesMemory())
 
 1224            dbgs() << 
"Loops with call instructions cannot be interchanged " 
 1227          return OptimizationRemarkMissed(
DEBUG_TYPE, 
"CallInst",
 
 1230                 << 
"Cannot interchange loops due to call instruction.";
 
 1236  if (!findInductions(InnerLoop, InnerLoopInductions)) {
 
 1237    LLVM_DEBUG(
dbgs() << 
"Could not find inner loop induction variables.\n");
 
 1242    LLVM_DEBUG(
dbgs() << 
"Found unsupported PHI nodes in inner loop latch.\n");
 
 1244      return OptimizationRemarkMissed(
DEBUG_TYPE, 
"UnsupportedInnerLatchPHI",
 
 1247             << 
"Cannot interchange loops because unsupported PHI nodes found " 
 1248                "in inner loop latch.";
 
 1255  if (currentLimitations()) {
 
 1256    LLVM_DEBUG(
dbgs() << 
"Not legal because of current transform limitation\n");
 
 1261  if (!tightlyNested(OuterLoop, InnerLoop)) {
 
 1264      return OptimizationRemarkMissed(
DEBUG_TYPE, 
"NotTightlyNested",
 
 1267             << 
"Cannot interchange loops because they are not tightly " 
 1274                                     OuterInnerReductions)) {
 
 1275    LLVM_DEBUG(
dbgs() << 
"Found unsupported PHI nodes in inner loop exit.\n");
 
 1277      return OptimizationRemarkMissed(
DEBUG_TYPE, 
"UnsupportedExitPHI",
 
 1280             << 
"Found unsupported PHI node in loop exit.";
 
 1286    LLVM_DEBUG(
dbgs() << 
"Found unsupported PHI nodes in outer loop exit.\n");
 
 1288      return OptimizationRemarkMissed(
DEBUG_TYPE, 
"UnsupportedExitPHI",
 
 1291             << 
"Found unsupported PHI node in loop exit.";
 
 1299void CacheCostManager::computeIfUnitinialized() {
 
 1314    for (
const auto &[Idx, 
Cost] : 
enumerate((*CC)->getLoopCosts()))
 
 1315      CostMap[
Cost.first] = Idx;
 
 1318CacheCost *CacheCostManager::getCacheCost() {
 
 1319  computeIfUnitinialized();
 
 1323const DenseMap<const Loop *, unsigned> &CacheCostManager::getCostMap() {
 
 1324  computeIfUnitinialized();
 
 1328int LoopInterchangeProfitability::getInstrOrderCost() {
 
 1329  unsigned GoodOrder, BadOrder;
 
 1330  BadOrder = GoodOrder = 0;
 
 1331  for (BasicBlock *BB : InnerLoop->
blocks()) {
 
 1332    for (Instruction &Ins : *BB) {
 
 1334        bool FoundInnerInduction = 
false;
 
 1335        bool FoundOuterInduction = 
false;
 
 1341          const SCEV *OperandVal = SE->
getSCEV(
Op);
 
 1351          if (AR->
getLoop() == InnerLoop) {
 
 1354            FoundInnerInduction = 
true;
 
 1355            if (FoundOuterInduction) {
 
 1365          if (AR->
getLoop() == OuterLoop) {
 
 1368            FoundOuterInduction = 
true;
 
 1369            if (FoundInnerInduction) {
 
 1378  return GoodOrder - BadOrder;
 
 1382LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
 
 1383    const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC) {
 
 1387  auto InnerLoopIt = CostMap.
find(InnerLoop);
 
 1388  if (InnerLoopIt == CostMap.
end())
 
 1389    return std::nullopt;
 
 1390  auto OuterLoopIt = CostMap.
find(OuterLoop);
 
 1391  if (OuterLoopIt == CostMap.
end())
 
 1392    return std::nullopt;
 
 1395    return std::nullopt;
 
 1396  unsigned InnerIndex = InnerLoopIt->second;
 
 1397  unsigned OuterIndex = OuterLoopIt->second;
 
 1399                    << 
", OuterIndex = " << OuterIndex << 
"\n");
 
 1400  assert(InnerIndex != OuterIndex && 
"CostMap should assign unique " 
 1401                                     "numbers to each loop");
 
 1402  return std::optional<bool>(InnerIndex < OuterIndex);
 
 1406LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
 
 1410  int Cost = getInstrOrderCost();
 
 1413    return std::optional<bool>(
true);
 
 1415  return std::nullopt;
 
 1420  for (
const auto &Dep : DepMatrix) {
 
 1421    char Dir = Dep[LoopId];
 
 1422    char DepType = Dep.back();
 
 1423    assert((DepType == 
'<' || DepType == 
'*') &&
 
 1424           "Unexpected element in dependency vector");
 
 1427    if (Dir == 
'=' || Dir == 
'I')
 
 1433    if (Dir == 
'<' && DepType == 
'<')
 
 
 1442std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
 
 1443    unsigned InnerLoopId, 
unsigned OuterLoopId, CharMatrix &DepMatrix) {
 
 1459  return std::nullopt;
 
 1462bool LoopInterchangeProfitability::isProfitable(
 
 1463    const Loop *InnerLoop, 
const Loop *OuterLoop, 
unsigned InnerLoopId,
 
 1464    unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
 
 1470         "Duplicate rules and option 'ignore' are not allowed");
 
 1480  std::optional<bool> shouldInterchange;
 
 1483    case RuleTy::PerLoopCacheAnalysis: {
 
 1484      CacheCost *CC = CCM.getCacheCost();
 
 1485      const DenseMap<const Loop *, unsigned> &CostMap = CCM.getCostMap();
 
 1486      shouldInterchange = isProfitablePerLoopCacheAnalysis(CostMap, CC);
 
 1489    case RuleTy::PerInstrOrderCost:
 
 1490      shouldInterchange = isProfitablePerInstrOrderCost();
 
 1492    case RuleTy::ForVectorization:
 
 1494          isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
 
 1496    case RuleTy::Ignore:
 
 1503    if (shouldInterchange.has_value())
 
 1507  if (!shouldInterchange.has_value()) {
 
 1509      return OptimizationRemarkMissed(
DEBUG_TYPE, 
"InterchangeNotProfitable",
 
 1512             << 
"Insufficient information to calculate the cost of loop for " 
 1516  } 
else if (!shouldInterchange.value()) {
 
 1518      return OptimizationRemarkMissed(
DEBUG_TYPE, 
"InterchangeNotProfitable",
 
 1521             << 
"Interchanging loops is not considered to improve cache " 
 1522                "locality nor vectorization.";
 
 1529void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
 
 1531  for (Loop *L : *OuterLoop)
 
 1532    if (L == InnerLoop) {
 
 1533      OuterLoop->removeChildLoop(L);
 
 1562void LoopInterchangeTransform::restructureLoops(
 
 1563    Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
 
 1564    BasicBlock *OrigOuterPreHeader) {
 
 1565  Loop *OuterLoopParent = OuterLoop->getParentLoop();
 
 1572  if (OuterLoopParent) {
 
 1574    removeChildLoop(OuterLoopParent, NewInner);
 
 1575    removeChildLoop(NewInner, NewOuter);
 
 1578    removeChildLoop(NewInner, NewOuter);
 
 1586  SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->
blocks());
 
 1590  for (BasicBlock *BB : NewInner->
blocks())
 
 1598  for (BasicBlock *BB : OrigInnerBBs) {
 
 1603    if (BB == OuterHeader || BB == OuterLatch)
 
 1618bool LoopInterchangeTransform::transform(
 
 1620  bool Transformed = 
false;
 
 1625    auto &InductionPHIs = LIL.getInnerLoopInductions();
 
 1626    if (InductionPHIs.empty()) {
 
 1627      LLVM_DEBUG(
dbgs() << 
"Failed to find the point to split loop latch \n");
 
 1631    SmallVector<Instruction *, 8> InnerIndexVarList;
 
 1632    for (PHINode *CurInductionPHI : InductionPHIs) {
 
 1633      if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
 
 1649    SmallSetVector<Instruction *, 4> WorkList;
 
 1651    auto MoveInstructions = [&i, &WorkList, 
this, &InductionPHIs, NewLatch]() {
 
 1652      for (; i < WorkList.
size(); i++) {
 
 1658               "Moving instructions with side-effects may change behavior of " 
 1669        for (
Value *
Op : WorkList[i]->operands()) {
 
 1687    for (Instruction *InnerIndexVar : InnerIndexVarList)
 
 1706  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
 
 1707  if (InnerLoopPreHeader != OuterLoopHeader) {
 
 1708    for (Instruction &
I :
 
 1710                                         std::prev(InnerLoopPreHeader->
end()))))
 
 1714  Transformed |= adjustLoopLinks();
 
 1722  for (Instruction *
Reduction : DropNoWrapInsts) {
 
 1746    I->removeFromParent();
 
 
 1761                            std::vector<DominatorTree::UpdateType> &DTUpdates,
 
 1762                            bool MustUpdateOnce = 
true) {
 
 1764         "BI must jump to OldBB exactly once.");
 
 1773    DTUpdates.push_back(
 
 1774        {DominatorTree::UpdateKind::Insert, BI->
getParent(), NewBB});
 
 1775    DTUpdates.push_back(
 
 1776        {DominatorTree::UpdateKind::Delete, BI->
getParent(), OldBB});
 
 
 1795    assert(
P.getNumIncomingValues() == 1 &&
 
 1796           "Only loops with a single exit are supported!");
 
 1805    if (IncIInnerMost->getParent() != InnerLatch &&
 
 1806        IncIInnerMost->
getParent() != InnerHeader)
 
 1810                  [OuterHeader, OuterExit, IncI, InnerHeader](
User *U) {
 
 1811                    return (cast<PHINode>(U)->getParent() == OuterHeader &&
 
 1812                            IncI->getParent() == InnerHeader) ||
 
 1813                           cast<PHINode>(U)->getParent() == OuterExit;
 
 1815           "Can only replace phis iff the uses are in the loop nest exit or " 
 1816           "the incoming value is defined in the inner header (it will " 
 1817           "dominate all loop blocks after interchanging)");
 
 1818    P.replaceAllUsesWith(IncI);
 
 1819    P.eraseFromParent();
 
 1847      if (
P.getNumIncomingValues() != 1)
 
 1861        if (Pred == OuterLatch)
 
 1866      P.setIncomingValue(0, NewPhi);
 
 
 1906  if (OuterLoopLatch == InnerLoopExit)
 
 1913    assert(Phi->getNumIncomingValues() == 1 && 
"Single input phi expected");
 
 1914    LLVM_DEBUG(
dbgs() << 
"Removing 1-input phi in non-exit block: " << *Phi
 
 1916    Phi->replaceAllUsesWith(Phi->getIncomingValue(0));
 
 1917    Phi->eraseFromParent();
 
 
 1921bool LoopInterchangeTransform::adjustLoopBranches() {
 
 1923  std::vector<DominatorTree::UpdateType> DTUpdates;
 
 1925  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
 
 1928  assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
 
 1929         InnerLoopPreHeader != InnerLoop->
getHeader() && OuterLoopPreHeader &&
 
 1930         InnerLoopPreHeader && 
"Guaranteed by loop-simplify form");
 
 1940    OuterLoopPreHeader =
 
 1942  if (InnerLoopPreHeader == OuterLoop->getHeader())
 
 1943    InnerLoopPreHeader =
 
 1948  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
 
 1950  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
 
 1957  BranchInst *OuterLoopLatchBI =
 
 1959  BranchInst *InnerLoopLatchBI =
 
 1961  BranchInst *OuterLoopHeaderBI =
 
 1963  BranchInst *InnerLoopHeaderBI =
 
 1966  if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
 
 1967      !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
 
 1971  BranchInst *InnerLoopLatchPredecessorBI =
 
 1973  BranchInst *OuterLoopPredecessorBI =
 
 1976  if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
 
 1979  if (!InnerLoopHeaderSuccessor)
 
 1987                  InnerLoopPreHeader, DTUpdates, 
false);
 
 1997                  InnerLoopHeaderSuccessor, DTUpdates,
 
 2005                  OuterLoopPreHeader, DTUpdates);
 
 2008  if (InnerLoopLatchBI->
getSuccessor(0) == InnerLoopHeader)
 
 2009    InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(1);
 
 2011    InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(0);
 
 2014                  InnerLoopLatchSuccessor, DTUpdates);
 
 2016  if (OuterLoopLatchBI->
getSuccessor(0) == OuterLoopHeader)
 
 2017    OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(1);
 
 2019    OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(0);
 
 2022                  OuterLoopLatchSuccessor, DTUpdates);
 
 2023  updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
 
 2027  restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
 
 2028                   OuterLoopPreHeader);
 
 2030  moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
 
 2031                OuterLoopHeader, OuterLoopLatch, InnerLoop->
getExitBlock(),
 
 2037  auto &OuterInnerReductions = LIL.getOuterInnerReductions();
 
 2040  for (PHINode &
PHI : InnerLoopHeader->
phis())
 
 2041    if (OuterInnerReductions.contains(&
PHI))
 
 2044  for (PHINode &
PHI : OuterLoopHeader->
phis())
 
 2045    if (OuterInnerReductions.contains(&
PHI))
 
 2051  for (PHINode *
PHI : OuterLoopPHIs) {
 
 2054    assert(OuterInnerReductions.count(
PHI) && 
"Expected a reduction PHI node");
 
 2056  for (PHINode *
PHI : InnerLoopPHIs) {
 
 2059    assert(OuterInnerReductions.count(
PHI) && 
"Expected a reduction PHI node");
 
 2072  SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
 
 2073  for (Instruction &
I :
 
 2081bool LoopInterchangeTransform::adjustLoopLinks() {
 
 2083  bool Changed = adjustLoopBranches();
 
 2088    BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
 
 2113    LLVM_DEBUG(
dbgs() << 
"Not valid loop candidate for interchange\n");
 
 2121           << 
"Computed dependence info, invoking the transform.";
 
 2125  if (!LoopInterchange(&AR.
SE, &AR.
LI, &DI, &AR.
DT, &AR, &ORE).run(LN))
 
 2127  U.markLoopNestChanged(
true);
 
 
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the StringMap class.
ReachingDefInfo InstSet InstSet & Ignore
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB)
Move the contents of SourceBB to before the last instruction of TargetBB.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the interface for the loop cache analysis.
SmallVector< Loop *, 4 > LoopVector
Loop::LoopBounds::Direction Direction
static cl::opt< unsigned int > MaxMemInstrCount("loop-interchange-max-meminstr-count", cl::init(64), cl::Hidden, cl::desc("Maximum number of load-store instructions that should be handled " "in the dependency matrix. Higher value may lead to more interchanges " "at the cost of compile-time"))
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
static PHINode * findInnerReductionPhi(Loop *L, Value *V, SmallVectorImpl< Instruction * > &HasNoWrapInsts)
static cl::opt< unsigned int > MinLoopNestDepth("loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden, cl::desc("Minimum depth of loop nest considered for the transform"))
static bool isComputableLoopNest(ScalarEvolution *SE, ArrayRef< Loop * > LoopList)
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static void simplifyLCSSAPhis(Loop *OuterLoop, Loop *InnerLoop)
This deals with a corner case when a LCSSA phi node appears in a non-exit block: the outer loop latch...
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
static void printDepMatrix(CharMatrix &DepMatrix)
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
static cl::opt< unsigned int > MaxLoopNestDepth("loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden, cl::desc("Maximum depth of loop nest considered for the transform"))
static bool hasSupportedLoopDepth(ArrayRef< Loop * > LoopList, OptimizationRemarkEmitter &ORE)
static bool inThisOrder(const Instruction *Src, const Instruction *Dst)
Return true if Src appears before Dst in the same basic block.
static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId)
Return true if we can vectorize the loop specified by LoopId.
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
static bool areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, SmallPtrSetImpl< PHINode * > &Reductions)
static Value * followLCSSA(Value *SV)
static void populateWorklist(Loop &L, LoopVector &LoopList)
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE)
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
static std::optional< bool > isLexicographicallyPositive(ArrayRef< char > DV, unsigned Begin, unsigned End)
static cl::list< RuleTy > Profitabilities("loop-interchange-profitabilities", cl::ZeroOrMore, cl::MiscFlags::CommaSeparated, cl::Hidden, cl::desc("List of profitability heuristics to be used. They are applied in " "the given order"), cl::list_init< RuleTy >({RuleTy::PerLoopCacheAnalysis, RuleTy::PerInstrOrderCost, RuleTy::ForVectorization}), cl::values(clEnumValN(RuleTy::PerLoopCacheAnalysis, "cache", "Prioritize loop cache cost"), clEnumValN(RuleTy::PerInstrOrderCost, "instorder", "Prioritize the IVs order of each instruction"), clEnumValN(RuleTy::ForVectorization, "vectorize", "Prioritize vectorization"), clEnumValN(RuleTy::Ignore, "ignore", "Ignore profitability, force interchange (does not " "work with other options)")))
static bool noDuplicateRulesAndIgnore(ArrayRef< RuleTy > Rules)
This file defines the interface for the loop nest analysis.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
loop Loop Strength Reduction
uint64_t IntrinsicInst * II
SmallVector< Value *, 8 > ValueVector
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
CacheCostTy getLoopCost(const Loop &L) const
Return the estimated cost of loop L if the given loop is part of the loop nest associated with this o...
iterator find(const_arg_type_t< KeyT > Val)
DependenceInfo - This class is the main dependence-analysis driver.
LLVM_ABI std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool UnderRuntimeAssumptions=false)
depends - Tests for a dependence between the Src and Dst instructions.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End, bool CheckUniquePred=false)
Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Function * getParent() const
Return the function to which the loop-nest belongs.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
static unsigned getIncomingValueNumForOperand(unsigned i)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
unsigned getOpcode() const
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
RecurKind getRecurrenceKind() const
const Loop * getLoop() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
StringRef - Represent a constant reference to a string, i.e.
constexpr size_t size() const
size - Get the string size.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
iterator_range< user_iterator > users()
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
list_initializer< Ty > list_init(ArrayRef< Ty > Vals)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
LLVM_ABI BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
LLVM_ABI bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto map_range(ContainerTy &&C, FuncTy F)
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FMinimumNum
FP min with llvm.minimumnum semantics.
@ Or
Bitwise or logical OR of integers.
@ FMinimum
FP min with llvm.minimum semantics.
@ Mul
Product of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMaximum
FP max with llvm.maximum semantics.
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ FMaximumNum
FP max with llvm.maximumnum semantics.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...