LLVM  14.0.0git
LoopInterchange.cpp
Go to the documentation of this file.
1 //===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This Pass handles loop interchange transform.
10 // This pass interchanges loops to provide a more cache-friendly memory access
11 // patterns.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/Analysis/LoopPass.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/DiagnosticInfo.h"
30 #include "llvm/IR/Dominators.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/IRBuilder.h"
33 #include "llvm/IR/InstrTypes.h"
34 #include "llvm/IR/Instruction.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/Type.h"
37 #include "llvm/IR/User.h"
38 #include "llvm/IR/Value.h"
39 #include "llvm/InitializePasses.h"
40 #include "llvm/Pass.h"
41 #include "llvm/Support/Casting.h"
43 #include "llvm/Support/Debug.h"
46 #include "llvm/Transforms/Scalar.h"
47 #include "llvm/Transforms/Utils.h"
50 #include <cassert>
51 #include <utility>
52 #include <vector>
53 
54 using namespace llvm;
55 
56 #define DEBUG_TYPE "loop-interchange"
57 
58 STATISTIC(LoopsInterchanged, "Number of loops interchanged");
59 
61  "loop-interchange-threshold", cl::init(0), cl::Hidden,
62  cl::desc("Interchange if you gain more than this number"));
63 
64 namespace {
65 
66 using LoopVector = SmallVector<Loop *, 8>;
67 
68 // TODO: Check if we can use a sparse matrix here.
69 using CharMatrix = std::vector<std::vector<char>>;
70 
71 } // end anonymous namespace
72 
73 // Maximum number of dependencies that can be handled in the dependency matrix.
74 static const unsigned MaxMemInstrCount = 100;
75 
76 // Maximum loop depth supported.
77 static const unsigned MaxLoopNestDepth = 10;
78 
79 #ifdef DUMP_DEP_MATRICIES
80 static void printDepMatrix(CharMatrix &DepMatrix) {
81  for (auto &Row : DepMatrix) {
82  for (auto D : Row)
83  LLVM_DEBUG(dbgs() << D << " ");
84  LLVM_DEBUG(dbgs() << "\n");
85  }
86 }
87 #endif
88 
89 static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
90  Loop *L, DependenceInfo *DI) {
91  using ValueVector = SmallVector<Value *, 16>;
92 
93  ValueVector MemInstr;
94 
95  // For each block.
96  for (BasicBlock *BB : L->blocks()) {
97  // Scan the BB and collect legal loads and stores.
98  for (Instruction &I : *BB) {
99  if (!isa<Instruction>(I))
100  return false;
101  if (auto *Ld = dyn_cast<LoadInst>(&I)) {
102  if (!Ld->isSimple())
103  return false;
104  MemInstr.push_back(&I);
105  } else if (auto *St = dyn_cast<StoreInst>(&I)) {
106  if (!St->isSimple())
107  return false;
108  MemInstr.push_back(&I);
109  }
110  }
111  }
112 
113  LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()
114  << " Loads and Stores to analyze\n");
115 
116  ValueVector::iterator I, IE, J, JE;
117 
118  for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
119  for (J = I, JE = MemInstr.end(); J != JE; ++J) {
120  std::vector<char> Dep;
121  Instruction *Src = cast<Instruction>(*I);
122  Instruction *Dst = cast<Instruction>(*J);
123  if (Src == Dst)
124  continue;
125  // Ignore Input dependencies.
126  if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
127  continue;
128  // Track Output, Flow, and Anti dependencies.
129  if (auto D = DI->depends(Src, Dst, true)) {
130  assert(D->isOrdered() && "Expected an output, flow or anti dep.");
131  LLVM_DEBUG(StringRef DepType =
132  D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
133  dbgs() << "Found " << DepType
134  << " dependency between Src and Dst\n"
135  << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
136  unsigned Levels = D->getLevels();
137  char Direction;
138  for (unsigned II = 1; II <= Levels; ++II) {
139  const SCEV *Distance = D->getDistance(II);
140  const SCEVConstant *SCEVConst =
141  dyn_cast_or_null<SCEVConstant>(Distance);
142  if (SCEVConst) {
143  const ConstantInt *CI = SCEVConst->getValue();
144  if (CI->isNegative())
145  Direction = '<';
146  else if (CI->isZero())
147  Direction = '=';
148  else
149  Direction = '>';
150  Dep.push_back(Direction);
151  } else if (D->isScalar(II)) {
152  Direction = 'S';
153  Dep.push_back(Direction);
154  } else {
155  unsigned Dir = D->getDirection(II);
156  if (Dir == Dependence::DVEntry::LT ||
158  Direction = '<';
159  else if (Dir == Dependence::DVEntry::GT ||
161  Direction = '>';
162  else if (Dir == Dependence::DVEntry::EQ)
163  Direction = '=';
164  else
165  Direction = '*';
166  Dep.push_back(Direction);
167  }
168  }
169  while (Dep.size() != Level) {
170  Dep.push_back('I');
171  }
172 
173  DepMatrix.push_back(Dep);
174  if (DepMatrix.size() > MaxMemInstrCount) {
175  LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
176  << " dependencies inside loop\n");
177  return false;
178  }
179  }
180  }
181  }
182 
183  return true;
184 }
185 
186 // A loop is moved from index 'from' to an index 'to'. Update the Dependence
187 // matrix by exchanging the two columns.
188 static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
189  unsigned ToIndx) {
190  for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I)
191  std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]);
192 }
193 
194 // Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
195 // '>'
196 static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
197  unsigned Column) {
198  for (unsigned i = 0; i <= Column; ++i) {
199  if (DepMatrix[Row][i] == '<')
200  return false;
201  if (DepMatrix[Row][i] == '>')
202  return true;
203  }
204  // All dependencies were '=','S' or 'I'
205  return false;
206 }
207 
208 // Checks if no dependence exist in the dependency matrix in Row before Column.
209 static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
210  unsigned Column) {
211  for (unsigned i = 0; i < Column; ++i) {
212  if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' &&
213  DepMatrix[Row][i] != 'I')
214  return false;
215  }
216  return true;
217 }
218 
219 static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
220  unsigned OuterLoopId, char InnerDep,
221  char OuterDep) {
222  if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
223  return false;
224 
225  if (InnerDep == OuterDep)
226  return true;
227 
228  // It is legal to interchange if and only if after interchange no row has a
229  // '>' direction as the leftmost non-'='.
230 
231  if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
232  return true;
233 
234  if (InnerDep == '<')
235  return true;
236 
237  if (InnerDep == '>') {
238  // If OuterLoopId represents outermost loop then interchanging will make the
239  // 1st dependency as '>'
240  if (OuterLoopId == 0)
241  return false;
242 
243  // If all dependencies before OuterloopId are '=','S'or 'I'. Then
244  // interchanging will result in this row having an outermost non '='
245  // dependency of '>'
246  if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
247  return true;
248  }
249 
250  return false;
251 }
252 
253 // Checks if it is legal to interchange 2 loops.
254 // [Theorem] A permutation of the loops in a perfect nest is legal if and only
255 // if the direction matrix, after the same permutation is applied to its
256 // columns, has no ">" direction as the leftmost non-"=" direction in any row.
257 static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
258  unsigned InnerLoopId,
259  unsigned OuterLoopId) {
260  unsigned NumRows = DepMatrix.size();
261  // For each row check if it is valid to interchange.
262  for (unsigned Row = 0; Row < NumRows; ++Row) {
263  char InnerDep = DepMatrix[Row][InnerLoopId];
264  char OuterDep = DepMatrix[Row][OuterLoopId];
265  if (InnerDep == '*' || OuterDep == '*')
266  return false;
267  if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))
268  return false;
269  }
270  return true;
271 }
272 
273 static LoopVector populateWorklist(Loop &L) {
274  LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
275  << L.getHeader()->getParent()->getName() << " Loop: %"
276  << L.getHeader()->getName() << '\n');
277  LoopVector LoopList;
278  Loop *CurrentLoop = &L;
279  const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
280  while (!Vec->empty()) {
281  // The current loop has multiple subloops in it hence it is not tightly
282  // nested.
283  // Discard all loops above it added into Worklist.
284  if (Vec->size() != 1)
285  return {};
286 
287  LoopList.push_back(CurrentLoop);
288  CurrentLoop = Vec->front();
289  Vec = &CurrentLoop->getSubLoops();
290  }
291  LoopList.push_back(CurrentLoop);
292  return LoopList;
293 }
294 
296  PHINode *InnerIndexVar = L->getCanonicalInductionVariable();
297  if (InnerIndexVar)
298  return InnerIndexVar;
299  if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr)
300  return nullptr;
301  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
302  PHINode *PhiVar = cast<PHINode>(I);
303  Type *PhiTy = PhiVar->getType();
304  if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
305  !PhiTy->isPointerTy())
306  return nullptr;
307  const SCEVAddRecExpr *AddRec =
308  dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar));
309  if (!AddRec || !AddRec->isAffine())
310  continue;
311  const SCEV *Step = AddRec->getStepRecurrence(*SE);
312  if (!isa<SCEVConstant>(Step))
313  continue;
314  // Found the induction variable.
315  // FIXME: Handle loops with more than one induction variable. Note that,
316  // currently, legality makes sure we have only one induction variable.
317  return PhiVar;
318  }
319  return nullptr;
320 }
321 
322 namespace {
323 
324 /// LoopInterchangeLegality checks if it is legal to interchange the loop.
325 class LoopInterchangeLegality {
326 public:
327  LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
329  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
330 
331  /// Check if the loops can be interchanged.
332  bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
333  CharMatrix &DepMatrix);
334 
335  /// Check if the loop structure is understood. We do not handle triangular
336  /// loops for now.
337  bool isLoopStructureUnderstood(PHINode *InnerInductionVar);
338 
339  bool currentLimitations();
340 
341  const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
342  return OuterInnerReductions;
343  }
344 
345 private:
346  bool tightlyNested(Loop *Outer, Loop *Inner);
347  bool containsUnsafeInstructions(BasicBlock *BB);
348 
349  /// Discover induction and reduction PHIs in the header of \p L. Induction
350  /// PHIs are added to \p Inductions, reductions are added to
351  /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
352  /// to be passed as \p InnerLoop.
353  bool findInductionAndReductions(Loop *L,
354  SmallVector<PHINode *, 8> &Inductions,
355  Loop *InnerLoop);
356 
357  Loop *OuterLoop;
358  Loop *InnerLoop;
359 
360  ScalarEvolution *SE;
361 
362  /// Interface to emit optimization remarks.
364 
365  /// Set of reduction PHIs taking part of a reduction across the inner and
366  /// outer loop.
367  SmallPtrSet<PHINode *, 4> OuterInnerReductions;
368 };
369 
370 /// LoopInterchangeProfitability checks if it is profitable to interchange the
371 /// loop.
372 class LoopInterchangeProfitability {
373 public:
374  LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
376  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
377 
378  /// Check if the loop interchange is profitable.
379  bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
380  CharMatrix &DepMatrix);
381 
382 private:
383  int getInstrOrderCost();
384 
385  Loop *OuterLoop;
386  Loop *InnerLoop;
387 
388  /// Scev analysis.
389  ScalarEvolution *SE;
390 
391  /// Interface to emit optimization remarks.
393 };
394 
395 /// LoopInterchangeTransform interchanges the loop.
396 class LoopInterchangeTransform {
397 public:
398  LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
399  LoopInfo *LI, DominatorTree *DT,
400  const LoopInterchangeLegality &LIL)
401  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
402 
403  /// Interchange OuterLoop and InnerLoop.
404  bool transform();
405  void restructureLoops(Loop *NewInner, Loop *NewOuter,
406  BasicBlock *OrigInnerPreHeader,
407  BasicBlock *OrigOuterPreHeader);
408  void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
409 
410 private:
411  bool adjustLoopLinks();
412  bool adjustLoopBranches();
413 
414  Loop *OuterLoop;
415  Loop *InnerLoop;
416 
417  /// Scev analysis.
418  ScalarEvolution *SE;
419 
420  LoopInfo *LI;
421  DominatorTree *DT;
422 
423  const LoopInterchangeLegality &LIL;
424 };
425 
426 struct LoopInterchange {
427  ScalarEvolution *SE = nullptr;
428  LoopInfo *LI = nullptr;
429  DependenceInfo *DI = nullptr;
430  DominatorTree *DT = nullptr;
431 
432  /// Interface to emit optimization remarks.
434 
435  LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
437  : SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {}
438 
439  bool run(Loop *L) {
440  if (L->getParentLoop())
441  return false;
442 
443  return processLoopList(populateWorklist(*L));
444  }
445 
446  bool run(LoopNest &LN) {
447  const auto &LoopList = LN.getLoops();
448  for (unsigned I = 1; I < LoopList.size(); ++I)
449  if (LoopList[I]->getParentLoop() != LoopList[I - 1])
450  return false;
451  return processLoopList(LoopList);
452  }
453 
454  bool isComputableLoopNest(ArrayRef<Loop *> LoopList) {
455  for (Loop *L : LoopList) {
456  const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
457  if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
458  LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
459  return false;
460  }
461  if (L->getNumBackEdges() != 1) {
462  LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
463  return false;
464  }
465  if (!L->getExitingBlock()) {
466  LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
467  return false;
468  }
469  }
470  return true;
471  }
472 
473  unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
474  // TODO: Add a better heuristic to select the loop to be interchanged based
475  // on the dependence matrix. Currently we select the innermost loop.
476  return LoopList.size() - 1;
477  }
478 
479  bool processLoopList(ArrayRef<Loop *> LoopList) {
480  bool Changed = false;
481  unsigned LoopNestDepth = LoopList.size();
482  if (LoopNestDepth < 2) {
483  LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
484  return false;
485  }
486  if (LoopNestDepth > MaxLoopNestDepth) {
487  LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
488  << MaxLoopNestDepth << "\n");
489  return false;
490  }
491  if (!isComputableLoopNest(LoopList)) {
492  LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
493  return false;
494  }
495 
496  LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
497  << "\n");
498 
499  CharMatrix DependencyMatrix;
500  Loop *OuterMostLoop = *(LoopList.begin());
501  if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
502  OuterMostLoop, DI)) {
503  LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
504  return false;
505  }
506 #ifdef DUMP_DEP_MATRICIES
507  LLVM_DEBUG(dbgs() << "Dependence before interchange\n");
508  printDepMatrix(DependencyMatrix);
509 #endif
510 
511  // Get the Outermost loop exit.
512  BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
513  if (!LoopNestExit) {
514  LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block");
515  return false;
516  }
517 
518  unsigned SelecLoopId = selectLoopForInterchange(LoopList);
519  // Move the selected loop outwards to the best possible position.
520  Loop *LoopToBeInterchanged = LoopList[SelecLoopId];
521  for (unsigned i = SelecLoopId; i > 0; i--) {
522  bool Interchanged = processLoop(LoopToBeInterchanged, LoopList[i - 1], i,
523  i - 1, DependencyMatrix);
524  if (!Interchanged)
525  return Changed;
526  // Update the DependencyMatrix
527  interChangeDependencies(DependencyMatrix, i, i - 1);
528 #ifdef DUMP_DEP_MATRICIES
529  LLVM_DEBUG(dbgs() << "Dependence after interchange\n");
530  printDepMatrix(DependencyMatrix);
531 #endif
532  Changed |= Interchanged;
533  }
534  return Changed;
535  }
536 
537  bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
538  unsigned OuterLoopId,
539  std::vector<std::vector<char>> &DependencyMatrix) {
540  LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
541  << " and OuterLoopId = " << OuterLoopId << "\n");
542  LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
543  if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
544  LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
545  return false;
546  }
547  LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
548  LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
549  if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
550  LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
551  return false;
552  }
553 
554  ORE->emit([&]() {
555  return OptimizationRemark(DEBUG_TYPE, "Interchanged",
556  InnerLoop->getStartLoc(),
557  InnerLoop->getHeader())
558  << "Loop interchanged with enclosing loop.";
559  });
560 
561  LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
562  LIT.transform();
563  LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
564  LoopsInterchanged++;
565 
566  assert(InnerLoop->isLCSSAForm(*DT) &&
567  "Inner loop not left in LCSSA form after loop interchange!");
568  assert(OuterLoop->isLCSSAForm(*DT) &&
569  "Outer loop not left in LCSSA form after loop interchange!");
570 
571  return true;
572  }
573 };
574 
575 } // end anonymous namespace
576 
577 bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
578  return any_of(*BB, [](const Instruction &I) {
579  return I.mayHaveSideEffects() || I.mayReadFromMemory();
580  });
581 }
582 
583 bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
584  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
585  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
586  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
587 
588  LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n");
589 
590  // A perfectly nested loop will not have any branch in between the outer and
591  // inner block i.e. outer header will branch to either inner preheader and
592  // outerloop latch.
593  BranchInst *OuterLoopHeaderBI =
594  dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
595  if (!OuterLoopHeaderBI)
596  return false;
597 
598  for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
599  if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
600  Succ != OuterLoopLatch)
601  return false;
602 
603  LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
604  // We do not have any basic block in between now make sure the outer header
605  // and outer loop latch doesn't contain any unsafe instructions.
606  if (containsUnsafeInstructions(OuterLoopHeader) ||
607  containsUnsafeInstructions(OuterLoopLatch))
608  return false;
609 
610  // Also make sure the inner loop preheader does not contain any unsafe
611  // instructions. Note that all instructions in the preheader will be moved to
612  // the outer loop header when interchanging.
613  if (InnerLoopPreHeader != OuterLoopHeader &&
614  containsUnsafeInstructions(InnerLoopPreHeader))
615  return false;
616 
617  BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
618  // Ensure the inner loop exit block flows to the outer loop latch possibly
619  // through empty blocks.
620  const BasicBlock &SuccInner =
621  LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch);
622  if (&SuccInner != OuterLoopLatch) {
623  LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExit
624  << " does not lead to the outer loop latch.\n";);
625  return false;
626  }
627  // The inner loop exit block does flow to the outer loop latch and not some
628  // other BBs, now make sure it contains safe instructions, since it will be
629  // moved into the (new) inner loop after interchange.
630  if (containsUnsafeInstructions(InnerLoopExit))
631  return false;
632 
633  LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
634  // We have a perfect loop nest.
635  return true;
636 }
637 
638 bool LoopInterchangeLegality::isLoopStructureUnderstood(
639  PHINode *InnerInduction) {
640  unsigned Num = InnerInduction->getNumOperands();
641  BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
642  for (unsigned i = 0; i < Num; ++i) {
643  Value *Val = InnerInduction->getOperand(i);
644  if (isa<Constant>(Val))
645  continue;
646  Instruction *I = dyn_cast<Instruction>(Val);
647  if (!I)
648  return false;
649  // TODO: Handle triangular loops.
650  // e.g. for(int i=0;i<N;i++)
651  // for(int j=i;j<N;j++)
652  unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
653  if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
654  InnerLoopPreheader &&
655  !OuterLoop->isLoopInvariant(I)) {
656  return false;
657  }
658  }
659 
660  // TODO: Handle triangular loops of another form.
661  // e.g. for(int i=0;i<N;i++)
662  // for(int j=0;j<i;j++)
663  // or,
664  // for(int i=0;i<N;i++)
665  // for(int j=0;j*i<N;j++)
666  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
667  BranchInst *InnerLoopLatchBI =
668  dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
669  if (!InnerLoopLatchBI->isConditional())
670  return false;
671  if (CmpInst *InnerLoopCmp =
672  dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) {
673  Value *Op0 = InnerLoopCmp->getOperand(0);
674  Value *Op1 = InnerLoopCmp->getOperand(1);
675 
676  // LHS and RHS of the inner loop exit condition, e.g.,
677  // in "for(int j=0;j<i;j++)", LHS is j and RHS is i.
678  Value *Left = nullptr;
679  Value *Right = nullptr;
680 
681  // Check if V only involves inner loop induction variable.
682  // Return true if V is InnerInduction, or a cast from
683  // InnerInduction, or a binary operator that involves
684  // InnerInduction and a constant.
685  std::function<bool(Value *)> IsPathToIndVar;
686  IsPathToIndVar = [&InnerInduction, &IsPathToIndVar](Value *V) -> bool {
687  if (V == InnerInduction)
688  return true;
689  if (isa<Constant>(V))
690  return true;
691  Instruction *I = dyn_cast<Instruction>(V);
692  if (!I)
693  return false;
694  if (isa<CastInst>(I))
695  return IsPathToIndVar(I->getOperand(0));
696  if (isa<BinaryOperator>(I))
697  return IsPathToIndVar(I->getOperand(0)) &&
698  IsPathToIndVar(I->getOperand(1));
699  return false;
700  };
701 
702  if (IsPathToIndVar(Op0) && !isa<Constant>(Op0)) {
703  Left = Op0;
704  Right = Op1;
705  } else if (IsPathToIndVar(Op1) && !isa<Constant>(Op1)) {
706  Left = Op1;
707  Right = Op0;
708  }
709 
710  if (Left == nullptr)
711  return false;
712 
713  const SCEV *S = SE->getSCEV(Right);
714  if (!SE->isLoopInvariant(S, OuterLoop))
715  return false;
716  }
717 
718  return true;
719 }
720 
721 // If SV is a LCSSA PHI node with a single incoming value, return the incoming
722 // value.
723 static Value *followLCSSA(Value *SV) {
724  PHINode *PHI = dyn_cast<PHINode>(SV);
725  if (!PHI)
726  return SV;
727 
728  if (PHI->getNumIncomingValues() != 1)
729  return SV;
730  return followLCSSA(PHI->getIncomingValue(0));
731 }
732 
733 // Check V's users to see if it is involved in a reduction in L.
735  // Reduction variables cannot be constants.
736  if (isa<Constant>(V))
737  return nullptr;
738 
739  for (Value *User : V->users()) {
740  if (PHINode *PHI = dyn_cast<PHINode>(User)) {
741  if (PHI->getNumIncomingValues() == 1)
742  continue;
744  if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
745  return PHI;
746  return nullptr;
747  }
748  }
749 
750  return nullptr;
751 }
752 
753 bool LoopInterchangeLegality::findInductionAndReductions(
754  Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
755  if (!L->getLoopLatch() || !L->getLoopPredecessor())
756  return false;
757  for (PHINode &PHI : L->getHeader()->phis()) {
760  if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
761  Inductions.push_back(&PHI);
762  else {
763  // PHIs in inner loops need to be part of a reduction in the outer loop,
764  // discovered when checking the PHIs of the outer loop earlier.
765  if (!InnerLoop) {
766  if (!OuterInnerReductions.count(&PHI)) {
767  LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
768  "across the outer loop.\n");
769  return false;
770  }
771  } else {
772  assert(PHI.getNumIncomingValues() == 2 &&
773  "Phis in loop header should have exactly 2 incoming values");
774  // Check if we have a PHI node in the outer loop that has a reduction
775  // result from the inner loop as an incoming value.
776  Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
777  PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
778  if (!InnerRedPhi ||
779  !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
780  LLVM_DEBUG(
781  dbgs()
782  << "Failed to recognize PHI as an induction or reduction.\n");
783  return false;
784  }
785  OuterInnerReductions.insert(&PHI);
786  OuterInnerReductions.insert(InnerRedPhi);
787  }
788  }
789  }
790  return true;
791 }
792 
793 // This function indicates the current limitations in the transform as a result
794 // of which we do not proceed.
795 bool LoopInterchangeLegality::currentLimitations() {
796  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
797  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
798 
799  // transform currently expects the loop latches to also be the exiting
800  // blocks.
801  if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
802  OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
803  !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
804  !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
805  LLVM_DEBUG(
806  dbgs() << "Loops where the latch is not the exiting block are not"
807  << " supported currently.\n");
808  ORE->emit([&]() {
809  return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch",
810  OuterLoop->getStartLoc(),
811  OuterLoop->getHeader())
812  << "Loops where the latch is not the exiting block cannot be"
813  " interchange currently.";
814  });
815  return true;
816  }
817 
818  PHINode *InnerInductionVar;
819  SmallVector<PHINode *, 8> Inductions;
820  if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
821  LLVM_DEBUG(
822  dbgs() << "Only outer loops with induction or reduction PHI nodes "
823  << "are supported currently.\n");
824  ORE->emit([&]() {
825  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
826  OuterLoop->getStartLoc(),
827  OuterLoop->getHeader())
828  << "Only outer loops with induction or reduction PHI nodes can be"
829  " interchanged currently.";
830  });
831  return true;
832  }
833 
834  // TODO: Currently we handle only loops with 1 induction variable.
835  if (Inductions.size() != 1) {
836  LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not "
837  << "supported currently.\n");
838  ORE->emit([&]() {
839  return OptimizationRemarkMissed(DEBUG_TYPE, "MultiIndutionOuter",
840  OuterLoop->getStartLoc(),
841  OuterLoop->getHeader())
842  << "Only outer loops with 1 induction variable can be "
843  "interchanged currently.";
844  });
845  return true;
846  }
847 
848  Inductions.clear();
849  if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) {
850  LLVM_DEBUG(
851  dbgs() << "Only inner loops with induction or reduction PHI nodes "
852  << "are supported currently.\n");
853  ORE->emit([&]() {
854  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
855  InnerLoop->getStartLoc(),
856  InnerLoop->getHeader())
857  << "Only inner loops with induction or reduction PHI nodes can be"
858  " interchange currently.";
859  });
860  return true;
861  }
862 
863  // TODO: Currently we handle only loops with 1 induction variable.
864  if (Inductions.size() != 1) {
865  LLVM_DEBUG(
866  dbgs() << "We currently only support loops with 1 induction variable."
867  << "Failed to interchange due to current limitation\n");
868  ORE->emit([&]() {
869  return OptimizationRemarkMissed(DEBUG_TYPE, "MultiInductionInner",
870  InnerLoop->getStartLoc(),
871  InnerLoop->getHeader())
872  << "Only inner loops with 1 induction variable can be "
873  "interchanged currently.";
874  });
875  return true;
876  }
877  InnerInductionVar = Inductions.pop_back_val();
878 
879  // TODO: Triangular loops are not handled for now.
880  if (!isLoopStructureUnderstood(InnerInductionVar)) {
881  LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n");
882  ORE->emit([&]() {
883  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner",
884  InnerLoop->getStartLoc(),
885  InnerLoop->getHeader())
886  << "Inner loop structure not understood currently.";
887  });
888  return true;
889  }
890 
891  // TODO: Current limitation: Since we split the inner loop latch at the point
892  // were induction variable is incremented (induction.next); We cannot have
893  // more than 1 user of induction.next since it would result in broken code
894  // after split.
895  // e.g.
896  // for(i=0;i<N;i++) {
897  // for(j = 0;j<M;j++) {
898  // A[j+1][i+2] = A[j][i]+k;
899  // }
900  // }
901  Instruction *InnerIndexVarInc = nullptr;
902  if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader)
903  InnerIndexVarInc =
904  dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1));
905  else
906  InnerIndexVarInc =
907  dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0));
908 
909  if (!InnerIndexVarInc) {
910  LLVM_DEBUG(
911  dbgs() << "Did not find an instruction to increment the induction "
912  << "variable.\n");
913  ORE->emit([&]() {
914  return OptimizationRemarkMissed(DEBUG_TYPE, "NoIncrementInInner",
915  InnerLoop->getStartLoc(),
916  InnerLoop->getHeader())
917  << "The inner loop does not increment the induction variable.";
918  });
919  return true;
920  }
921 
922  // Since we split the inner loop latch on this induction variable. Make sure
923  // we do not have any instruction between the induction variable and branch
924  // instruction.
925 
926  bool FoundInduction = false;
927  for (const Instruction &I :
928  llvm::reverse(InnerLoopLatch->instructionsWithoutDebug())) {
929  if (isa<BranchInst>(I) || isa<CmpInst>(I) || isa<TruncInst>(I) ||
930  isa<ZExtInst>(I))
931  continue;
932 
933  // We found an instruction. If this is not induction variable then it is not
934  // safe to split this loop latch.
935  if (!I.isIdenticalTo(InnerIndexVarInc)) {
936  LLVM_DEBUG(dbgs() << "Found unsupported instructions between induction "
937  << "variable increment and branch.\n");
938  ORE->emit([&]() {
940  DEBUG_TYPE, "UnsupportedInsBetweenInduction",
941  InnerLoop->getStartLoc(), InnerLoop->getHeader())
942  << "Found unsupported instruction between induction variable "
943  "increment and branch.";
944  });
945  return true;
946  }
947 
948  FoundInduction = true;
949  break;
950  }
951  // The loop latch ended and we didn't find the induction variable return as
952  // current limitation.
953  if (!FoundInduction) {
954  LLVM_DEBUG(dbgs() << "Did not find the induction variable.\n");
955  ORE->emit([&]() {
956  return OptimizationRemarkMissed(DEBUG_TYPE, "NoIndutionVariable",
957  InnerLoop->getStartLoc(),
958  InnerLoop->getHeader())
959  << "Did not find the induction variable.";
960  });
961  return true;
962  }
963  return false;
964 }
965 
966 // We currently only support LCSSA PHI nodes in the inner loop exit, if their
967 // users are either reduction PHIs or PHIs outside the outer loop (which means
968 // the we are only interested in the final value after the loop).
969 static bool
971  SmallPtrSetImpl<PHINode *> &Reductions) {
972  BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
973  for (PHINode &PHI : InnerExit->phis()) {
974  // Reduction lcssa phi will have only 1 incoming block that from loop latch.
975  if (PHI.getNumIncomingValues() > 1)
976  return false;
977  if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
978  PHINode *PN = dyn_cast<PHINode>(U);
979  return !PN ||
980  (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
981  })) {
982  return false;
983  }
984  }
985  return true;
986 }
987 
988 // We currently support LCSSA PHI nodes in the outer loop exit, if their
989 // incoming values do not come from the outer loop latch or if the
990 // outer loop latch has a single predecessor. In that case, the value will
991 // be available if both the inner and outer loop conditions are true, which
992 // will still be true after interchanging. If we have multiple predecessor,
993 // that may not be the case, e.g. because the outer loop latch may be executed
994 // if the inner loop is not executed.
995 static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
996  BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
997  for (PHINode &PHI : LoopNestExit->phis()) {
998  // FIXME: We currently are not able to detect floating point reductions
999  // and have to use floating point PHIs as a proxy to prevent
1000  // interchanging in the presence of floating point reductions.
1001  if (PHI.getType()->isFloatingPointTy())
1002  return false;
1003  for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
1004  Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
1005  if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
1006  continue;
1007 
1008  // The incoming value is defined in the outer loop latch. Currently we
1009  // only support that in case the outer loop latch has a single predecessor.
1010  // This guarantees that the outer loop latch is executed if and only if
1011  // the inner loop is executed (because tightlyNested() guarantees that the
1012  // outer loop header only branches to the inner loop or the outer loop
1013  // latch).
1014  // FIXME: We could weaken this logic and allow multiple predecessors,
1015  // if the values are produced outside the loop latch. We would need
1016  // additional logic to update the PHI nodes in the exit block as
1017  // well.
1018  if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
1019  return false;
1020  }
1021  }
1022  return true;
1023 }
1024 
1025 // In case of multi-level nested loops, it may occur that lcssa phis exist in
1026 // the latch of InnerLoop, i.e., when defs of the incoming values are further
1027 // inside the loopnest. Sometimes those incoming values are not available
1028 // after interchange, since the original inner latch will become the new outer
1029 // latch which may have predecessor paths that do not include those incoming
1030 // values.
1031 // TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of
1032 // multi-level loop nests.
1033 static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
1034  if (InnerLoop->getSubLoops().empty())
1035  return true;
1036  // If the original outer latch has only one predecessor, then values defined
1037  // further inside the looploop, e.g., in the innermost loop, will be available
1038  // at the new outer latch after interchange.
1039  if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr)
1040  return true;
1041 
1042  // The outer latch has more than one predecessors, i.e., the inner
1043  // exit and the inner header.
1044  // PHI nodes in the inner latch are lcssa phis where the incoming values
1045  // are defined further inside the loopnest. Check if those phis are used
1046  // in the original inner latch. If that is the case then bail out since
1047  // those incoming values may not be available at the new outer latch.
1048  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1049  for (PHINode &PHI : InnerLoopLatch->phis()) {
1050  for (auto *U : PHI.users()) {
1051  Instruction *UI = cast<Instruction>(U);
1052  if (InnerLoopLatch == UI->getParent())
1053  return false;
1054  }
1055  }
1056  return true;
1057 }
1058 
1059 bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
1060  unsigned OuterLoopId,
1061  CharMatrix &DepMatrix) {
1062  if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
1063  LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
1064  << " and OuterLoopId = " << OuterLoopId
1065  << " due to dependence\n");
1066  ORE->emit([&]() {
1067  return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",
1068  InnerLoop->getStartLoc(),
1069  InnerLoop->getHeader())
1070  << "Cannot interchange loops due to dependences.";
1071  });
1072  return false;
1073  }
1074  // Check if outer and inner loop contain legal instructions only.
1075  for (auto *BB : OuterLoop->blocks())
1076  for (Instruction &I : BB->instructionsWithoutDebug())
1077  if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1078  // readnone functions do not prevent interchanging.
1079  if (CI->doesNotReadMemory())
1080  continue;
1081  LLVM_DEBUG(
1082  dbgs() << "Loops with call instructions cannot be interchanged "
1083  << "safely.");
1084  ORE->emit([&]() {
1085  return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst",
1086  CI->getDebugLoc(),
1087  CI->getParent())
1088  << "Cannot interchange loops due to call instruction.";
1089  });
1090 
1091  return false;
1092  }
1093 
1094  if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) {
1095  LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n");
1096  ORE->emit([&]() {
1097  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedInnerLatchPHI",
1098  InnerLoop->getStartLoc(),
1099  InnerLoop->getHeader())
1100  << "Cannot interchange loops because unsupported PHI nodes found "
1101  "in inner loop latch.";
1102  });
1103  return false;
1104  }
1105 
1106  // TODO: The loops could not be interchanged due to current limitations in the
1107  // transform module.
1108  if (currentLimitations()) {
1109  LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n");
1110  return false;
1111  }
1112 
1113  // Check if the loops are tightly nested.
1114  if (!tightlyNested(OuterLoop, InnerLoop)) {
1115  LLVM_DEBUG(dbgs() << "Loops not tightly nested\n");
1116  ORE->emit([&]() {
1117  return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested",
1118  InnerLoop->getStartLoc(),
1119  InnerLoop->getHeader())
1120  << "Cannot interchange loops because they are not tightly "
1121  "nested.";
1122  });
1123  return false;
1124  }
1125 
1126  if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1127  OuterInnerReductions)) {
1128  LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n");
1129  ORE->emit([&]() {
1130  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1131  InnerLoop->getStartLoc(),
1132  InnerLoop->getHeader())
1133  << "Found unsupported PHI node in loop exit.";
1134  });
1135  return false;
1136  }
1137 
1138  if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1139  LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
1140  ORE->emit([&]() {
1141  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1142  OuterLoop->getStartLoc(),
1143  OuterLoop->getHeader())
1144  << "Found unsupported PHI node in loop exit.";
1145  });
1146  return false;
1147  }
1148 
1149  return true;
1150 }
1151 
1152 int LoopInterchangeProfitability::getInstrOrderCost() {
1153  unsigned GoodOrder, BadOrder;
1154  BadOrder = GoodOrder = 0;
1155  for (BasicBlock *BB : InnerLoop->blocks()) {
1156  for (Instruction &Ins : *BB) {
1157  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1158  unsigned NumOp = GEP->getNumOperands();
1159  bool FoundInnerInduction = false;
1160  bool FoundOuterInduction = false;
1161  for (unsigned i = 0; i < NumOp; ++i) {
1162  // Skip operands that are not SCEV-able.
1163  if (!SE->isSCEVable(GEP->getOperand(i)->getType()))
1164  continue;
1165 
1166  const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
1167  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1168  if (!AR)
1169  continue;
1170 
1171  // If we find the inner induction after an outer induction e.g.
1172  // for(int i=0;i<N;i++)
1173  // for(int j=0;j<N;j++)
1174  // A[i][j] = A[i-1][j-1]+k;
1175  // then it is a good order.
1176  if (AR->getLoop() == InnerLoop) {
1177  // We found an InnerLoop induction after OuterLoop induction. It is
1178  // a good order.
1179  FoundInnerInduction = true;
1180  if (FoundOuterInduction) {
1181  GoodOrder++;
1182  break;
1183  }
1184  }
1185  // If we find the outer induction after an inner induction e.g.
1186  // for(int i=0;i<N;i++)
1187  // for(int j=0;j<N;j++)
1188  // A[j][i] = A[j-1][i-1]+k;
1189  // then it is a bad order.
1190  if (AR->getLoop() == OuterLoop) {
1191  // We found an OuterLoop induction after InnerLoop induction. It is
1192  // a bad order.
1193  FoundOuterInduction = true;
1194  if (FoundInnerInduction) {
1195  BadOrder++;
1196  break;
1197  }
1198  }
1199  }
1200  }
1201  }
1202  }
1203  return GoodOrder - BadOrder;
1204 }
1205 
1206 static bool isProfitableForVectorization(unsigned InnerLoopId,
1207  unsigned OuterLoopId,
1208  CharMatrix &DepMatrix) {
1209  // TODO: Improve this heuristic to catch more cases.
1210  // If the inner loop is loop independent or doesn't carry any dependency it is
1211  // profitable to move this to outer position.
1212  for (auto &Row : DepMatrix) {
1213  if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I')
1214  return false;
1215  // TODO: We need to improve this heuristic.
1216  if (Row[OuterLoopId] != '=')
1217  return false;
1218  }
1219  // If outer loop has dependence and inner loop is loop independent then it is
1220  // profitable to interchange to enable parallelism.
1221  // If there are no dependences, interchanging will not improve anything.
1222  return !DepMatrix.empty();
1223 }
1224 
1225 bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
1226  unsigned OuterLoopId,
1227  CharMatrix &DepMatrix) {
1228  // TODO: Add better profitability checks.
1229  // e.g
1230  // 1) Construct dependency matrix and move the one with no loop carried dep
1231  // inside to enable vectorization.
1232 
1233  // This is rough cost estimation algorithm. It counts the good and bad order
1234  // of induction variables in the instruction and allows reordering if number
1235  // of bad orders is more than good.
1236  int Cost = getInstrOrderCost();
1237  LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
1238  if (Cost < -LoopInterchangeCostThreshold)
1239  return true;
1240 
1241  // It is not profitable as per current cache profitability model. But check if
1242  // we can move this loop outside to improve parallelism.
1243  if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
1244  return true;
1245 
1246  ORE->emit([&]() {
1247  return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1248  InnerLoop->getStartLoc(),
1249  InnerLoop->getHeader())
1250  << "Interchanging loops is too costly (cost="
1251  << ore::NV("Cost", Cost) << ", threshold="
1252  << ore::NV("Threshold", LoopInterchangeCostThreshold)
1253  << ") and it does not improve parallelism.";
1254  });
1255  return false;
1256 }
1257 
1258 void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1259  Loop *InnerLoop) {
1260  for (Loop *L : *OuterLoop)
1261  if (L == InnerLoop) {
1262  OuterLoop->removeChildLoop(L);
1263  return;
1264  }
1265  llvm_unreachable("Couldn't find loop");
1266 }
1267 
1268 /// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1269 /// new inner and outer loop after interchanging: NewInner is the original
1270 /// outer loop and NewOuter is the original inner loop.
1271 ///
1272 /// Before interchanging, we have the following structure
1273 /// Outer preheader
1274 // Outer header
1275 // Inner preheader
1276 // Inner header
1277 // Inner body
1278 // Inner latch
1279 // outer bbs
1280 // Outer latch
1281 //
1282 // After interchanging:
1283 // Inner preheader
1284 // Inner header
1285 // Outer preheader
1286 // Outer header
1287 // Inner body
1288 // outer bbs
1289 // Outer latch
1290 // Inner latch
1291 void LoopInterchangeTransform::restructureLoops(
1292  Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1293  BasicBlock *OrigOuterPreHeader) {
1294  Loop *OuterLoopParent = OuterLoop->getParentLoop();
1295  // The original inner loop preheader moves from the new inner loop to
1296  // the parent loop, if there is one.
1297  NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1298  LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1299 
1300  // Switch the loop levels.
1301  if (OuterLoopParent) {
1302  // Remove the loop from its parent loop.
1303  removeChildLoop(OuterLoopParent, NewInner);
1304  removeChildLoop(NewInner, NewOuter);
1305  OuterLoopParent->addChildLoop(NewOuter);
1306  } else {
1307  removeChildLoop(NewInner, NewOuter);
1308  LI->changeTopLevelLoop(NewInner, NewOuter);
1309  }
1310  while (!NewOuter->isInnermost())
1311  NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1312  NewOuter->addChildLoop(NewInner);
1313 
1314  // BBs from the original inner loop.
1315  SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1316 
1317  // Add BBs from the original outer loop to the original inner loop (excluding
1318  // BBs already in inner loop)
1319  for (BasicBlock *BB : NewInner->blocks())
1320  if (LI->getLoopFor(BB) == NewInner)
1321  NewOuter->addBlockEntry(BB);
1322 
1323  // Now remove inner loop header and latch from the new inner loop and move
1324  // other BBs (the loop body) to the new inner loop.
1325  BasicBlock *OuterHeader = NewOuter->getHeader();
1326  BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1327  for (BasicBlock *BB : OrigInnerBBs) {
1328  // Nothing will change for BBs in child loops.
1329  if (LI->getLoopFor(BB) != NewOuter)
1330  continue;
1331  // Remove the new outer loop header and latch from the new inner loop.
1332  if (BB == OuterHeader || BB == OuterLatch)
1333  NewInner->removeBlockFromLoop(BB);
1334  else
1335  LI->changeLoopFor(BB, NewInner);
1336  }
1337 
1338  // The preheader of the original outer loop becomes part of the new
1339  // outer loop.
1340  NewOuter->addBlockEntry(OrigOuterPreHeader);
1341  LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1342 
1343  // Tell SE that we move the loops around.
1344  SE->forgetLoop(NewOuter);
1345  SE->forgetLoop(NewInner);
1346 }
1347 
1349  bool Transformed = false;
1350  Instruction *InnerIndexVar;
1351 
1352  if (InnerLoop->getSubLoops().empty()) {
1353  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1354  LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n");
1355  PHINode *InductionPHI = getInductionVariable(InnerLoop, SE);
1356  if (!InductionPHI) {
1357  LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
1358  return false;
1359  }
1360 
1361  if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1362  InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(1));
1363  else
1364  InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0));
1365 
1366  // Ensure that InductionPHI is the first Phi node.
1367  if (&InductionPHI->getParent()->front() != InductionPHI)
1368  InductionPHI->moveBefore(&InductionPHI->getParent()->front());
1369 
1370  // Create a new latch block for the inner loop. We split at the
1371  // current latch's terminator and then move the condition and all
1372  // operands that are not either loop-invariant or the induction PHI into the
1373  // new latch block.
1374  BasicBlock *NewLatch =
1375  SplitBlock(InnerLoop->getLoopLatch(),
1376  InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
1377 
1379  unsigned i = 0;
1380  auto MoveInstructions = [&i, &WorkList, this, InductionPHI, NewLatch]() {
1381  for (; i < WorkList.size(); i++) {
1382  // Duplicate instruction and move it the new latch. Update uses that
1383  // have been moved.
1384  Instruction *NewI = WorkList[i]->clone();
1385  NewI->insertBefore(NewLatch->getFirstNonPHI());
1386  assert(!NewI->mayHaveSideEffects() &&
1387  "Moving instructions with side-effects may change behavior of "
1388  "the loop nest!");
1389  for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) {
1390  Instruction *UserI = cast<Instruction>(U.getUser());
1391  if (!InnerLoop->contains(UserI->getParent()) ||
1392  UserI->getParent() == NewLatch || UserI == InductionPHI)
1393  U.set(NewI);
1394  }
1395  // Add operands of moved instruction to the worklist, except if they are
1396  // outside the inner loop or are the induction PHI.
1397  for (Value *Op : WorkList[i]->operands()) {
1398  Instruction *OpI = dyn_cast<Instruction>(Op);
1399  if (!OpI ||
1400  this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
1401  OpI == InductionPHI)
1402  continue;
1403  WorkList.insert(OpI);
1404  }
1405  }
1406  };
1407 
1408  // FIXME: Should we interchange when we have a constant condition?
1409  Instruction *CondI = dyn_cast<Instruction>(
1410  cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
1411  ->getCondition());
1412  if (CondI)
1413  WorkList.insert(CondI);
1414  MoveInstructions();
1415  WorkList.insert(cast<Instruction>(InnerIndexVar));
1416  MoveInstructions();
1417 
1418  // Splits the inner loops phi nodes out into a separate basic block.
1419  BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1420  SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1421  LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
1422  }
1423 
1424  // Instructions in the original inner loop preheader may depend on values
1425  // defined in the outer loop header. Move them there, because the original
1426  // inner loop preheader will become the entry into the interchanged loop nest.
1427  // Currently we move all instructions and rely on LICM to move invariant
1428  // instructions outside the loop nest.
1429  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1430  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1431  if (InnerLoopPreHeader != OuterLoopHeader) {
1432  SmallPtrSet<Instruction *, 4> NeedsMoving;
1433  for (Instruction &I :
1434  make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
1435  std::prev(InnerLoopPreHeader->end()))))
1436  I.moveBefore(OuterLoopHeader->getTerminator());
1437  }
1438 
1439  Transformed |= adjustLoopLinks();
1440  if (!Transformed) {
1441  LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");
1442  return false;
1443  }
1444 
1445  return true;
1446 }
1447 
1448 /// \brief Move all instructions except the terminator from FromBB right before
1449 /// InsertBefore
1450 static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1451  auto &ToList = InsertBefore->getParent()->getInstList();
1452  auto &FromList = FromBB->getInstList();
1453 
1454  ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
1455  FromBB->getTerminator()->getIterator());
1456 }
1457 
1458 /// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
1459 static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
1460  // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
1461  // from BB1 afterwards.
1462  auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
1463  SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
1464  for (Instruction *I : TempInstrs)
1465  I->removeFromParent();
1466 
1467  // Move instructions from BB2 to BB1.
1468  moveBBContents(BB2, BB1->getTerminator());
1469 
1470  // Move instructions from TempInstrs to BB2.
1471  for (Instruction *I : TempInstrs)
1472  I->insertBefore(BB2->getTerminator());
1473 }
1474 
1475 // Update BI to jump to NewBB instead of OldBB. Records updates to the
1476 // dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1477 // \p OldBB is exactly once in BI's successor list.
1478 static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1479  BasicBlock *NewBB,
1480  std::vector<DominatorTree::UpdateType> &DTUpdates,
1481  bool MustUpdateOnce = true) {
1482  assert((!MustUpdateOnce ||
1484  [OldBB](BasicBlock *BB) {
1485  return BB == OldBB;
1486  }) == 1) && "BI must jump to OldBB exactly once.");
1487  bool Changed = false;
1488  for (Use &Op : BI->operands())
1489  if (Op == OldBB) {
1490  Op.set(NewBB);
1491  Changed = true;
1492  }
1493 
1494  if (Changed) {
1495  DTUpdates.push_back(
1497  DTUpdates.push_back(
1499  }
1500  assert(Changed && "Expected a successor to be updated");
1501 }
1502 
1503 // Move Lcssa PHIs to the right place.
1504 static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
1505  BasicBlock *InnerLatch, BasicBlock *OuterHeader,
1506  BasicBlock *OuterLatch, BasicBlock *OuterExit,
1507  Loop *InnerLoop, LoopInfo *LI) {
1508 
1509  // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
1510  // defined either in the header or latch. Those blocks will become header and
1511  // latch of the new outer loop, and the only possible users can PHI nodes
1512  // in the exit block of the loop nest or the outer loop header (reduction
1513  // PHIs, in that case, the incoming value must be defined in the inner loop
1514  // header). We can just substitute the user with the incoming value and remove
1515  // the PHI.
1516  for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
1517  assert(P.getNumIncomingValues() == 1 &&
1518  "Only loops with a single exit are supported!");
1519 
1520  // Incoming values are guaranteed be instructions currently.
1521  auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
1522  // Skip phis with incoming values from the inner loop body, excluding the
1523  // header and latch.
1524  if (IncI->getParent() != InnerLatch && IncI->getParent() != InnerHeader)
1525  continue;
1526 
1527  assert(all_of(P.users(),
1528  [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {
1529  return (cast<PHINode>(U)->getParent() == OuterHeader &&
1530  IncI->getParent() == InnerHeader) ||
1531  cast<PHINode>(U)->getParent() == OuterExit;
1532  }) &&
1533  "Can only replace phis iff the uses are in the loop nest exit or "
1534  "the incoming value is defined in the inner header (it will "
1535  "dominate all loop blocks after interchanging)");
1536  P.replaceAllUsesWith(IncI);
1537  P.eraseFromParent();
1538  }
1539 
1540  SmallVector<PHINode *, 8> LcssaInnerExit;
1541  for (PHINode &P : InnerExit->phis())
1542  LcssaInnerExit.push_back(&P);
1543 
1544  SmallVector<PHINode *, 8> LcssaInnerLatch;
1545  for (PHINode &P : InnerLatch->phis())
1546  LcssaInnerLatch.push_back(&P);
1547 
1548  // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1549  // If a PHI node has users outside of InnerExit, it has a use outside the
1550  // interchanged loop and we have to preserve it. We move these to
1551  // InnerLatch, which will become the new exit block for the innermost
1552  // loop after interchanging.
1553  for (PHINode *P : LcssaInnerExit)
1554  P->moveBefore(InnerLatch->getFirstNonPHI());
1555 
1556  // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1557  // and we have to move them to the new inner latch.
1558  for (PHINode *P : LcssaInnerLatch)
1559  P->moveBefore(InnerExit->getFirstNonPHI());
1560 
1561  // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1562  // incoming values defined in the outer loop, we have to add a new PHI
1563  // in the inner loop latch, which became the exit block of the outer loop,
1564  // after interchanging.
1565  if (OuterExit) {
1566  for (PHINode &P : OuterExit->phis()) {
1567  if (P.getNumIncomingValues() != 1)
1568  continue;
1569  // Skip Phis with incoming values defined in the inner loop. Those should
1570  // already have been updated.
1571  auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
1572  if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
1573  continue;
1574 
1575  PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
1576  NewPhi->setIncomingValue(0, P.getIncomingValue(0));
1577  NewPhi->setIncomingBlock(0, OuterLatch);
1578  // We might have incoming edges from other BBs, i.e., the original outer
1579  // header.
1580  for (auto *Pred : predecessors(InnerLatch)) {
1581  if (Pred == OuterLatch)
1582  continue;
1583  NewPhi->addIncoming(P.getIncomingValue(0), Pred);
1584  }
1585  NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
1586  P.setIncomingValue(0, NewPhi);
1587  }
1588  }
1589 
1590  // Now adjust the incoming blocks for the LCSSA PHIs.
1591  // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1592  // with the new latch.
1593  InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
1594 }
1595 
1596 bool LoopInterchangeTransform::adjustLoopBranches() {
1597  LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
1598  std::vector<DominatorTree::UpdateType> DTUpdates;
1599 
1600  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1601  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1602 
1603  assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1604  InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
1605  InnerLoopPreHeader && "Guaranteed by loop-simplify form");
1606  // Ensure that both preheaders do not contain PHI nodes and have single
1607  // predecessors. This allows us to move them easily. We use
1608  // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1609  // preheaders do not satisfy those conditions.
1610  if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1611  !OuterLoopPreHeader->getUniquePredecessor())
1612  OuterLoopPreHeader =
1613  InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
1614  if (InnerLoopPreHeader == OuterLoop->getHeader())
1615  InnerLoopPreHeader =
1616  InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
1617 
1618  // Adjust the loop preheader
1619  BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1620  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1621  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1622  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1623  BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1624  BasicBlock *InnerLoopLatchPredecessor =
1625  InnerLoopLatch->getUniquePredecessor();
1626  BasicBlock *InnerLoopLatchSuccessor;
1627  BasicBlock *OuterLoopLatchSuccessor;
1628 
1629  BranchInst *OuterLoopLatchBI =
1630  dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1631  BranchInst *InnerLoopLatchBI =
1632  dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1633  BranchInst *OuterLoopHeaderBI =
1634  dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1635  BranchInst *InnerLoopHeaderBI =
1636  dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1637 
1638  if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1639  !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1640  !InnerLoopHeaderBI)
1641  return false;
1642 
1643  BranchInst *InnerLoopLatchPredecessorBI =
1644  dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1645  BranchInst *OuterLoopPredecessorBI =
1646  dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1647 
1648  if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1649  return false;
1650  BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1651  if (!InnerLoopHeaderSuccessor)
1652  return false;
1653 
1654  // Adjust Loop Preheader and headers.
1655  // The branches in the outer loop predecessor and the outer loop header can
1656  // be unconditional branches or conditional branches with duplicates. Consider
1657  // this when updating the successors.
1658  updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1659  InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
1660  // The outer loop header might or might not branch to the outer latch.
1661  // We are guaranteed to branch to the inner loop preheader.
1662  if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
1663  // In this case the outerLoopHeader should branch to the InnerLoopLatch.
1664  updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
1665  DTUpdates,
1666  /*MustUpdateOnce=*/false);
1667  }
1668  updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1669  InnerLoopHeaderSuccessor, DTUpdates,
1670  /*MustUpdateOnce=*/false);
1671 
1672  // Adjust reduction PHI's now that the incoming block has changed.
1673  InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
1674  OuterLoopHeader);
1675 
1676  updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1677  OuterLoopPreHeader, DTUpdates);
1678 
1679  // -------------Adjust loop latches-----------
1680  if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1681  InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1682  else
1683  InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1684 
1685  updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1686  InnerLoopLatchSuccessor, DTUpdates);
1687 
1688 
1689  if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1690  OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1691  else
1692  OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1693 
1694  updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1695  OuterLoopLatchSuccessor, DTUpdates);
1696  updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1697  DTUpdates);
1698 
1699  DT->applyUpdates(DTUpdates);
1700  restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1701  OuterLoopPreHeader);
1702 
1703  moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1704  OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
1705  InnerLoop, LI);
1706  // For PHIs in the exit block of the outer loop, outer's latch has been
1707  // replaced by Inners'.
1708  OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1709 
1710  auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1711  // Now update the reduction PHIs in the inner and outer loop headers.
1712  SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1713  for (PHINode &PHI : InnerLoopHeader->phis())
1714  if (OuterInnerReductions.contains(&PHI))
1715  InnerLoopPHIs.push_back(cast<PHINode>(&PHI));
1716  for (PHINode &PHI : OuterLoopHeader->phis())
1717  if (OuterInnerReductions.contains(&PHI))
1718  OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
1719 
1720  // Now move the remaining reduction PHIs from outer to inner loop header and
1721  // vice versa. The PHI nodes must be part of a reduction across the inner and
1722  // outer loop and all the remains to do is and updating the incoming blocks.
1723  for (PHINode *PHI : OuterLoopPHIs) {
1724  PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
1725  assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
1726  }
1727  for (PHINode *PHI : InnerLoopPHIs) {
1728  PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
1729  assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
1730  }
1731 
1732  // Update the incoming blocks for moved PHI nodes.
1733  OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
1734  OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
1735  InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
1736  InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1737 
1738  // Values defined in the outer loop header could be used in the inner loop
1739  // latch. In that case, we need to create LCSSA phis for them, because after
1740  // interchanging they will be defined in the new inner loop and used in the
1741  // new outer loop.
1742  IRBuilder<> Builder(OuterLoopHeader->getContext());
1743  SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
1744  for (Instruction &I :
1745  make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
1746  MayNeedLCSSAPhis.push_back(&I);
1747  formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE, Builder);
1748 
1749  return true;
1750 }
1751 
1752 bool LoopInterchangeTransform::adjustLoopLinks() {
1753  // Adjust all branches in the inner and outer loop.
1754  bool Changed = adjustLoopBranches();
1755  if (Changed) {
1756  // We have interchanged the preheaders so we need to interchange the data in
1757  // the preheaders as well. This is because the content of the inner
1758  // preheader was previously executed inside the outer loop.
1759  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1760  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1761  swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
1762  }
1763  return Changed;
1764 }
1765 
1766 namespace {
1767 /// Main LoopInterchange Pass.
1768 struct LoopInterchangeLegacyPass : public LoopPass {
1769  static char ID;
1770 
1771  LoopInterchangeLegacyPass() : LoopPass(ID) {
1773  }
1774 
1775  void getAnalysisUsage(AnalysisUsage &AU) const override {
1778 
1780  }
1781 
1782  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1783  if (skipLoop(L))
1784  return false;
1785 
1786  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1787  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1788  auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
1789  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1790  auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1791 
1792  return LoopInterchange(SE, LI, DI, DT, ORE).run(L);
1793  }
1794 };
1795 } // namespace
1796 
1798 
1799 INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange",
1800  "Interchanges loops for cache reuse", false, false)
1804 
1805 INITIALIZE_PASS_END(LoopInterchangeLegacyPass, "loop-interchange",
1806  "Interchanges loops for cache reuse", false, false)
1807 
1809  return new LoopInterchangeLegacyPass();
1810 }
1811 
1813  LoopAnalysisManager &AM,
1815  LPMUpdater &U) {
1816  Function &F = *LN.getParent();
1817 
1818  DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
1820  if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(LN))
1821  return PreservedAnalyses::all();
1823 }
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::initializeLoopInterchangeLegacyPassPass
void initializeLoopInterchangeLegacyPassPass(PassRegistry &)
isLegalToInterChangeLoops
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
Definition: LoopInterchange.cpp:257
moveBBContents
static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore)
Move all instructions except the terminator from FromBB right before InsertBefore.
Definition: LoopInterchange.cpp:1450
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
followLCSSA
static Value * followLCSSA(Value *SV)
Definition: LoopInterchange.cpp:723
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:730
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:64
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::LoopBase::getUniqueExitBlock
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:138
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::BranchInst::successors
iterator_range< succ_op_iterator > successors()
Definition: Instructions.h:3211
llvm::LoopBase::getExitBlock
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:82
llvm::PHINode::incoming_values
op_range incoming_values()
Definition: Instructions.h:2752
llvm::RecurrenceDescriptor::isReductionPHI
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Returns true if Phi is a reduction in TheLoop.
Definition: IVDescriptors.cpp:748
llvm::User::operands
op_range operands()
Definition: User.h:242
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:379
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:217
Scalar.h
llvm::DependenceAnalysisWrapperPass
Legacy pass manager pass to access dependence information.
Definition: DependenceAnalysis.h:979
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::cfg::UpdateKind::Insert
@ Insert
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::BasicBlock::instructionsWithoutDebug
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:104
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopInterchange.cpp:56
uses
This might compile to this xmm1 xorps xmm0 movss xmm0 ret Now consider if the code caused xmm1 to get spilled This might produce this xmm1 movaps xmm0 movaps xmm1 movss xmm0 ret since the reload is only used by these we could fold it into the uses
Definition: README-SSE.txt:258
llvm::SmallVector< Loop *, 8 >
Statistic.h
llvm::LoopBase::getLoopPredecessor
BlockT * getLoopPredecessor() const
If the given loop's header has exactly one unique predecessor outside the loop, return it.
Definition: LoopInfoImpl.h:195
ErrorHandling.h
llvm::IRBuilder<>
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:634
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::LoopInfoBase::changeLoopFor
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
Definition: LoopInfo.h:1005
llvm::Dependence::DVEntry::GE
@ GE
Definition: DependenceAnalysis.h:95
moveLCSSAPhis
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
Definition: LoopInterchange.cpp:1504
OptimizationRemarkEmitter.h
llvm::Instruction::insertBefore
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:84
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:149
Right
Vector Shift Left Right
Definition: README_P9.txt:118
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
isOuterMostDepPositive
static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row, unsigned Column)
Definition: LoopInterchange.cpp:196
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:359
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
llvm::cfg::UpdateKind::Delete
@ Delete
interchange
loop interchange
Definition: LoopInterchange.cpp:1805
llvm::LoopBase::begin
iterator begin() const
Definition: LoopInfo.h:154
findInnerReductionPhi
static PHINode * findInnerReductionPhi(Loop *L, Value *V)
Definition: LoopInterchange.cpp:734
llvm::SmallPtrSet< PHINode *, 4 >
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::Dependence::DVEntry::LT
@ LT
Definition: DependenceAnalysis.h:90
llvm::codeview::VFTableSlotKind::Outer
@ Outer
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:262
llvm::SCEVConstant::getValue
ConstantInt * getValue() const
Definition: ScalarEvolutionExpressions.h:56
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:55
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
llvm::PHINode::setIncomingValue
void setIncomingValue(unsigned i, Value *V)
Definition: Instructions.h:2765
llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1671
loops
loops
Definition: LoopInfo.cpp:1176
llvm::DominatorTreeBase::applyUpdates
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Definition: GenericDomTree.h:544
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::Instruction::mayHaveSideEffects
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.cpp:689
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock::getUniqueSuccessor
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:306
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::AlignStyle::Left
@ Left
Instruction.h
CommandLine.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::LoopBase::getSubLoops
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:143
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1581
llvm::InductionDescriptor
A struct for saving information about induction variables.
Definition: IVDescriptors.h:297
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::createLoopInterchangePass
Pass * createLoopInterchangePass()
Definition: LoopInterchange.cpp:1808
Constants.h
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2762
areInnerLoopExitPHIsSupported
static bool areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, SmallPtrSetImpl< PHINode * > &Reductions)
Definition: LoopInterchange.cpp:970
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
InstrTypes.h
llvm::LoopBase::addChildLoop
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:395
MaxLoopNestDepth
static const unsigned MaxLoopNestDepth
Definition: LoopInterchange.cpp:77
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
swapBBContents
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
Definition: LoopInterchange.cpp:1459
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
populateWorklist
static LoopVector populateWorklist(Loop &L)
Definition: LoopInterchange.cpp:273
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
false
Definition: StackSlotColoring.cpp:142
populateDependencyMatrix
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI)
Definition: LoopInterchange.cpp:89
llvm::Instruction
Definition: Instruction.h:45
isProfitableForVectorization
static bool isProfitableForVectorization(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix)
Definition: LoopInterchange.cpp:1206
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:354
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
LoopUtils.h
llvm::LPPassManager
Definition: LoopPass.h:75
LoopInterchangeCostThreshold
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange", "Interchanges loops for cache reuse", false, false) INITIALIZE_PASS_END(LoopInterchangeLegacyPass
llvm::LoopBase::getExitingBlock
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:216
areInnerLoopLatchPHIsSupported
static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
Definition: LoopInterchange.cpp:1033
llvm::LoopBase::removeBlockFromLoop
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
Definition: LoopInfo.h:460
Utils.h
getInductionVariable
static PHINode * getInductionVariable(Loop *L, ScalarEvolution *SE)
Definition: LoopInterchange.cpp:295
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2758
Type.h
llvm::Loop::LoopBounds::Direction
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:701
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3182
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4111
llvm::PHINode::getIncomingValueNumForOperand
static unsigned getIncomingValueNumForOperand(unsigned i)
Definition: Instructions.h:2776
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:711
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:190
llvm::InsertPreheaderForLoop
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
Definition: LoopSimplify.cpp:123
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:140
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1432
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:77
llvm::DependenceInfo
DependenceInfo - This class is the main dependence-analysis driver.
Definition: DependenceAnalysis.h:272
llvm::LoopPass
Definition: LoopPass.h:27
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:263
llvm::PHINode::setIncomingBlock
void setIncomingBlock(unsigned i, BasicBlock *BB)
Definition: Instructions.h:2801
containsNoDependence
static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row, unsigned Column)
Definition: LoopInterchange.cpp:209
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2816
llvm::LoopNest::skipEmptyBlockUntil
static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End, bool CheckUniquePred=false)
Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).
Definition: LoopNestAnalysis.cpp:289
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:967
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:928
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:593
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1646
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:47
validDepInterchange
static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row, unsigned OuterLoopId, char InnerDep, char OuterDep)
Definition: LoopInterchange.cpp:219
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::Dependence::DVEntry::LE
@ LE
Definition: DependenceAnalysis.h:92
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
LoopNestAnalysis.h
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
LoopInterchange.h
llvm::Instruction::clone
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
Definition: Instruction.cpp:856
llvm::LoopBase::removeChildLoop
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Definition: LoopInfo.h:404
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1588
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
this
Analysis the ScalarEvolution expression for r is this
Definition: README.txt:8
LoopPass.h
llvm::Loop::getCanonicalInductionVariable
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition: LoopInfo.cpp:150
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::transform
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1678
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:194
llvm::BasicBlock::getUniquePredecessor
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:276
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:56
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::LoopNest::getParent
Function * getParent() const
Return the function to which the loop-nest belongs.
Definition: LoopNestAnalysis.h:153
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
for
this could be done in SelectionDAGISel along with other special for
Definition: README.txt:104
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:12776
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:308
areOuterLoopExitPHIsSupported
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
Definition: LoopInterchange.cpp:995
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:36
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
llvm::InductionDescriptor::isInductionPHI
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Definition: IVDescriptors.cpp:1345
llvm::BasicBlock::replacePhiUsesWith
void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
Definition: BasicBlock.cpp:450
llvm::map_range
auto map_range(ContainerTy &&C, FuncTy F)
Definition: STLExtras.h:306
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::ScalarEvolution::forgetLoop
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
Definition: ScalarEvolution.cpp:7622
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:364
interChangeDependencies
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
Definition: LoopInterchange.cpp:188
llvm::Loop::isLCSSAForm
bool isLCSSAForm(const DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:462
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:324
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::ArrayRef::begin
iterator begin() const
Definition: ArrayRef.h:151
reuse
loop Interchanges loops for cache reuse
Definition: LoopInterchange.cpp:1806
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:352
Direction
Loop::LoopBounds::Direction Direction
Definition: LoopInfo.cpp:230
Casting.h
DiagnosticInfo.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::ScalarEvolution::isSCEVable
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition: ScalarEvolution.cpp:3968
llvm::LoopBase::addBlockEntry
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
Definition: LoopInfo.h:423
transform
instcombine should handle this transform
Definition: README.txt:262
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::LoopBase::getNumBackEdges
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:250
updateSuccessor
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
Definition: LoopInterchange.cpp:1478
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:57
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:53
llvm::Dependence::DVEntry::GT
@ GT
Definition: DependenceAnalysis.h:93
llvm::LoopNest::getLoops
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Definition: LoopNestAnalysis.h:109
ScalarEvolutionExpressions.h
llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:73
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:363
llvm::DependenceInfo::depends
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
Definition: DependenceAnalysis.cpp:3525
llvm::formLCSSAForInstructions
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, IRBuilderBase &Builder, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
Definition: LCSSA.cpp:79
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:685
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
Instructions.h
llvm::Dependence::DVEntry::EQ
@ EQ
Definition: DependenceAnalysis.h:91
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::ConstantInt::isNegative
bool isNegative() const
Definition: Constants.h:189
SmallVector.h
User.h
llvm::LoopInfoBase::changeTopLevelLoop
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
Definition: LoopInfo.h:1015
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2782
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::PHINode
Definition: Instructions.h:2666
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:157
llvm::LoopInterchangePass::run
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopInterchange.cpp:1812
llvm::SmallPtrSetImpl< PHINode * >
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1487
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
DependenceAnalysis.h
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
MaxMemInstrCount
static const unsigned MaxMemInstrCount
Definition: LoopInterchange.cpp:74
llvm::cl::desc
Definition: CommandLine.h:412
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3101
raw_ostream.h
llvm::ScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
Definition: ScalarEvolution.cpp:7494
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:835
Value.h
InitializePasses.h
llvm::LoopNest
This class represents a loop nest and can be used to query its properties.
Definition: LoopNestAnalysis.h:28
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3180
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3194
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:370
llvm::Instruction::moveBefore
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:97
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38