LLVM  13.0.0git
LoopInterchange.cpp
Go to the documentation of this file.
1 //===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This Pass handles loop interchange transform.
10 // This pass interchanges loops to provide a more cache-friendly memory access
11 // patterns.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/Analysis/LoopPass.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DiagnosticInfo.h"
29 #include "llvm/IR/Dominators.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/IRBuilder.h"
32 #include "llvm/IR/InstrTypes.h"
33 #include "llvm/IR/Instruction.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/Type.h"
36 #include "llvm/IR/User.h"
37 #include "llvm/IR/Value.h"
38 #include "llvm/InitializePasses.h"
39 #include "llvm/Pass.h"
40 #include "llvm/Support/Casting.h"
42 #include "llvm/Support/Debug.h"
45 #include "llvm/Transforms/Scalar.h"
46 #include "llvm/Transforms/Utils.h"
49 #include <cassert>
50 #include <utility>
51 #include <vector>
52 
53 using namespace llvm;
54 
55 #define DEBUG_TYPE "loop-interchange"
56 
57 STATISTIC(LoopsInterchanged, "Number of loops interchanged");
58 
60  "loop-interchange-threshold", cl::init(0), cl::Hidden,
61  cl::desc("Interchange if you gain more than this number"));
62 
63 namespace {
64 
65 using LoopVector = SmallVector<Loop *, 8>;
66 
67 // TODO: Check if we can use a sparse matrix here.
68 using CharMatrix = std::vector<std::vector<char>>;
69 
70 } // end anonymous namespace
71 
72 // Maximum number of dependencies that can be handled in the dependency matrix.
73 static const unsigned MaxMemInstrCount = 100;
74 
75 // Maximum loop depth supported.
76 static const unsigned MaxLoopNestDepth = 10;
77 
78 #ifdef DUMP_DEP_MATRICIES
79 static void printDepMatrix(CharMatrix &DepMatrix) {
80  for (auto &Row : DepMatrix) {
81  for (auto D : Row)
82  LLVM_DEBUG(dbgs() << D << " ");
83  LLVM_DEBUG(dbgs() << "\n");
84  }
85 }
86 #endif
87 
88 static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
89  Loop *L, DependenceInfo *DI) {
90  using ValueVector = SmallVector<Value *, 16>;
91 
92  ValueVector MemInstr;
93 
94  // For each block.
95  for (BasicBlock *BB : L->blocks()) {
96  // Scan the BB and collect legal loads and stores.
97  for (Instruction &I : *BB) {
98  if (!isa<Instruction>(I))
99  return false;
100  if (auto *Ld = dyn_cast<LoadInst>(&I)) {
101  if (!Ld->isSimple())
102  return false;
103  MemInstr.push_back(&I);
104  } else if (auto *St = dyn_cast<StoreInst>(&I)) {
105  if (!St->isSimple())
106  return false;
107  MemInstr.push_back(&I);
108  }
109  }
110  }
111 
112  LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()
113  << " Loads and Stores to analyze\n");
114 
115  ValueVector::iterator I, IE, J, JE;
116 
117  for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
118  for (J = I, JE = MemInstr.end(); J != JE; ++J) {
119  std::vector<char> Dep;
120  Instruction *Src = cast<Instruction>(*I);
121  Instruction *Dst = cast<Instruction>(*J);
122  if (Src == Dst)
123  continue;
124  // Ignore Input dependencies.
125  if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
126  continue;
127  // Track Output, Flow, and Anti dependencies.
128  if (auto D = DI->depends(Src, Dst, true)) {
129  assert(D->isOrdered() && "Expected an output, flow or anti dep.");
130  LLVM_DEBUG(StringRef DepType =
131  D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
132  dbgs() << "Found " << DepType
133  << " dependency between Src and Dst\n"
134  << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
135  unsigned Levels = D->getLevels();
136  char Direction;
137  for (unsigned II = 1; II <= Levels; ++II) {
138  const SCEV *Distance = D->getDistance(II);
139  const SCEVConstant *SCEVConst =
140  dyn_cast_or_null<SCEVConstant>(Distance);
141  if (SCEVConst) {
142  const ConstantInt *CI = SCEVConst->getValue();
143  if (CI->isNegative())
144  Direction = '<';
145  else if (CI->isZero())
146  Direction = '=';
147  else
148  Direction = '>';
149  Dep.push_back(Direction);
150  } else if (D->isScalar(II)) {
151  Direction = 'S';
152  Dep.push_back(Direction);
153  } else {
154  unsigned Dir = D->getDirection(II);
155  if (Dir == Dependence::DVEntry::LT ||
157  Direction = '<';
158  else if (Dir == Dependence::DVEntry::GT ||
160  Direction = '>';
161  else if (Dir == Dependence::DVEntry::EQ)
162  Direction = '=';
163  else
164  Direction = '*';
165  Dep.push_back(Direction);
166  }
167  }
168  while (Dep.size() != Level) {
169  Dep.push_back('I');
170  }
171 
172  DepMatrix.push_back(Dep);
173  if (DepMatrix.size() > MaxMemInstrCount) {
174  LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
175  << " dependencies inside loop\n");
176  return false;
177  }
178  }
179  }
180  }
181 
182  return true;
183 }
184 
185 // A loop is moved from index 'from' to an index 'to'. Update the Dependence
186 // matrix by exchanging the two columns.
187 static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
188  unsigned ToIndx) {
189  for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I)
190  std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]);
191 }
192 
193 // Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
194 // '>'
195 static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
196  unsigned Column) {
197  for (unsigned i = 0; i <= Column; ++i) {
198  if (DepMatrix[Row][i] == '<')
199  return false;
200  if (DepMatrix[Row][i] == '>')
201  return true;
202  }
203  // All dependencies were '=','S' or 'I'
204  return false;
205 }
206 
207 // Checks if no dependence exist in the dependency matrix in Row before Column.
208 static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
209  unsigned Column) {
210  for (unsigned i = 0; i < Column; ++i) {
211  if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' &&
212  DepMatrix[Row][i] != 'I')
213  return false;
214  }
215  return true;
216 }
217 
218 static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
219  unsigned OuterLoopId, char InnerDep,
220  char OuterDep) {
221  if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
222  return false;
223 
224  if (InnerDep == OuterDep)
225  return true;
226 
227  // It is legal to interchange if and only if after interchange no row has a
228  // '>' direction as the leftmost non-'='.
229 
230  if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
231  return true;
232 
233  if (InnerDep == '<')
234  return true;
235 
236  if (InnerDep == '>') {
237  // If OuterLoopId represents outermost loop then interchanging will make the
238  // 1st dependency as '>'
239  if (OuterLoopId == 0)
240  return false;
241 
242  // If all dependencies before OuterloopId are '=','S'or 'I'. Then
243  // interchanging will result in this row having an outermost non '='
244  // dependency of '>'
245  if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
246  return true;
247  }
248 
249  return false;
250 }
251 
252 // Checks if it is legal to interchange 2 loops.
253 // [Theorem] A permutation of the loops in a perfect nest is legal if and only
254 // if the direction matrix, after the same permutation is applied to its
255 // columns, has no ">" direction as the leftmost non-"=" direction in any row.
256 static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
257  unsigned InnerLoopId,
258  unsigned OuterLoopId) {
259  unsigned NumRows = DepMatrix.size();
260  // For each row check if it is valid to interchange.
261  for (unsigned Row = 0; Row < NumRows; ++Row) {
262  char InnerDep = DepMatrix[Row][InnerLoopId];
263  char OuterDep = DepMatrix[Row][OuterLoopId];
264  if (InnerDep == '*' || OuterDep == '*')
265  return false;
266  if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))
267  return false;
268  }
269  return true;
270 }
271 
272 static LoopVector populateWorklist(Loop &L) {
273  LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
274  << L.getHeader()->getParent()->getName() << " Loop: %"
275  << L.getHeader()->getName() << '\n');
276  LoopVector LoopList;
277  Loop *CurrentLoop = &L;
278  const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
279  while (!Vec->empty()) {
280  // The current loop has multiple subloops in it hence it is not tightly
281  // nested.
282  // Discard all loops above it added into Worklist.
283  if (Vec->size() != 1)
284  return {};
285 
286  LoopList.push_back(CurrentLoop);
287  CurrentLoop = Vec->front();
288  Vec = &CurrentLoop->getSubLoops();
289  }
290  LoopList.push_back(CurrentLoop);
291  return LoopList;
292 }
293 
295  PHINode *InnerIndexVar = L->getCanonicalInductionVariable();
296  if (InnerIndexVar)
297  return InnerIndexVar;
298  if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr)
299  return nullptr;
300  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
301  PHINode *PhiVar = cast<PHINode>(I);
302  Type *PhiTy = PhiVar->getType();
303  if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
304  !PhiTy->isPointerTy())
305  return nullptr;
306  const SCEVAddRecExpr *AddRec =
307  dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar));
308  if (!AddRec || !AddRec->isAffine())
309  continue;
310  const SCEV *Step = AddRec->getStepRecurrence(*SE);
311  if (!isa<SCEVConstant>(Step))
312  continue;
313  // Found the induction variable.
314  // FIXME: Handle loops with more than one induction variable. Note that,
315  // currently, legality makes sure we have only one induction variable.
316  return PhiVar;
317  }
318  return nullptr;
319 }
320 
321 namespace {
322 
323 /// LoopInterchangeLegality checks if it is legal to interchange the loop.
324 class LoopInterchangeLegality {
325 public:
326  LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
328  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
329 
330  /// Check if the loops can be interchanged.
331  bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
332  CharMatrix &DepMatrix);
333 
334  /// Check if the loop structure is understood. We do not handle triangular
335  /// loops for now.
336  bool isLoopStructureUnderstood(PHINode *InnerInductionVar);
337 
338  bool currentLimitations();
339 
340  const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
341  return OuterInnerReductions;
342  }
343 
344 private:
345  bool tightlyNested(Loop *Outer, Loop *Inner);
346  bool containsUnsafeInstructions(BasicBlock *BB);
347 
348  /// Discover induction and reduction PHIs in the header of \p L. Induction
349  /// PHIs are added to \p Inductions, reductions are added to
350  /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
351  /// to be passed as \p InnerLoop.
352  bool findInductionAndReductions(Loop *L,
353  SmallVector<PHINode *, 8> &Inductions,
354  Loop *InnerLoop);
355 
356  Loop *OuterLoop;
357  Loop *InnerLoop;
358 
359  ScalarEvolution *SE;
360 
361  /// Interface to emit optimization remarks.
363 
364  /// Set of reduction PHIs taking part of a reduction across the inner and
365  /// outer loop.
366  SmallPtrSet<PHINode *, 4> OuterInnerReductions;
367 };
368 
369 /// LoopInterchangeProfitability checks if it is profitable to interchange the
370 /// loop.
371 class LoopInterchangeProfitability {
372 public:
373  LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
375  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
376 
377  /// Check if the loop interchange is profitable.
378  bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
379  CharMatrix &DepMatrix);
380 
381 private:
382  int getInstrOrderCost();
383 
384  Loop *OuterLoop;
385  Loop *InnerLoop;
386 
387  /// Scev analysis.
388  ScalarEvolution *SE;
389 
390  /// Interface to emit optimization remarks.
392 };
393 
394 /// LoopInterchangeTransform interchanges the loop.
395 class LoopInterchangeTransform {
396 public:
397  LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
398  LoopInfo *LI, DominatorTree *DT,
399  BasicBlock *LoopNestExit,
400  const LoopInterchangeLegality &LIL)
401  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
402  LoopExit(LoopNestExit), LIL(LIL) {}
403 
404  /// Interchange OuterLoop and InnerLoop.
405  bool transform();
406  void restructureLoops(Loop *NewInner, Loop *NewOuter,
407  BasicBlock *OrigInnerPreHeader,
408  BasicBlock *OrigOuterPreHeader);
409  void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
410 
411 private:
412  bool adjustLoopLinks();
413  bool adjustLoopBranches();
414 
415  Loop *OuterLoop;
416  Loop *InnerLoop;
417 
418  /// Scev analysis.
419  ScalarEvolution *SE;
420 
421  LoopInfo *LI;
422  DominatorTree *DT;
423  BasicBlock *LoopExit;
424 
425  const LoopInterchangeLegality &LIL;
426 };
427 
428 struct LoopInterchange {
429  ScalarEvolution *SE = nullptr;
430  LoopInfo *LI = nullptr;
431  DependenceInfo *DI = nullptr;
432  DominatorTree *DT = nullptr;
433 
434  /// Interface to emit optimization remarks.
436 
437  LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
439  : SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {}
440 
441  bool run(Loop *L) {
442  if (L->getParentLoop())
443  return false;
444 
445  return processLoopList(populateWorklist(*L));
446  }
447 
448  bool run(LoopNest &LN) {
449  const auto &LoopList = LN.getLoops();
450  for (unsigned I = 1; I < LoopList.size(); ++I)
451  if (LoopList[I]->getParentLoop() != LoopList[I - 1])
452  return false;
453  return processLoopList(LoopList);
454  }
455 
456  bool isComputableLoopNest(ArrayRef<Loop *> LoopList) {
457  for (Loop *L : LoopList) {
458  const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
459  if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
460  LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
461  return false;
462  }
463  if (L->getNumBackEdges() != 1) {
464  LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
465  return false;
466  }
467  if (!L->getExitingBlock()) {
468  LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
469  return false;
470  }
471  }
472  return true;
473  }
474 
475  unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
476  // TODO: Add a better heuristic to select the loop to be interchanged based
477  // on the dependence matrix. Currently we select the innermost loop.
478  return LoopList.size() - 1;
479  }
480 
481  bool processLoopList(ArrayRef<Loop *> LoopList) {
482  bool Changed = false;
483  unsigned LoopNestDepth = LoopList.size();
484  if (LoopNestDepth < 2) {
485  LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
486  return false;
487  }
488  if (LoopNestDepth > MaxLoopNestDepth) {
489  LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
490  << MaxLoopNestDepth << "\n");
491  return false;
492  }
493  if (!isComputableLoopNest(LoopList)) {
494  LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
495  return false;
496  }
497 
498  LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
499  << "\n");
500 
501  CharMatrix DependencyMatrix;
502  Loop *OuterMostLoop = *(LoopList.begin());
503  if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
504  OuterMostLoop, DI)) {
505  LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
506  return false;
507  }
508 #ifdef DUMP_DEP_MATRICIES
509  LLVM_DEBUG(dbgs() << "Dependence before interchange\n");
510  printDepMatrix(DependencyMatrix);
511 #endif
512 
513  // Get the Outermost loop exit.
514  BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
515  if (!LoopNestExit) {
516  LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block");
517  return false;
518  }
519 
520  unsigned SelecLoopId = selectLoopForInterchange(LoopList);
521  // Move the selected loop outwards to the best possible position.
522  Loop *LoopToBeInterchanged = LoopList[SelecLoopId];
523  for (unsigned i = SelecLoopId; i > 0; i--) {
524  bool Interchanged = processLoop(LoopToBeInterchanged, LoopList[i - 1], i,
525  i - 1, LoopNestExit, DependencyMatrix);
526  if (!Interchanged)
527  return Changed;
528  // Update the DependencyMatrix
529  interChangeDependencies(DependencyMatrix, i, i - 1);
530 #ifdef DUMP_DEP_MATRICIES
531  LLVM_DEBUG(dbgs() << "Dependence after interchange\n");
532  printDepMatrix(DependencyMatrix);
533 #endif
534  Changed |= Interchanged;
535  }
536  return Changed;
537  }
538 
539  bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
540  unsigned OuterLoopId, BasicBlock *LoopNestExit,
541  std::vector<std::vector<char>> &DependencyMatrix) {
542  LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
543  << " and OuterLoopId = " << OuterLoopId << "\n");
544  LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
545  if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
546  LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
547  return false;
548  }
549  LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
550  LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
551  if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
552  LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
553  return false;
554  }
555 
556  ORE->emit([&]() {
557  return OptimizationRemark(DEBUG_TYPE, "Interchanged",
558  InnerLoop->getStartLoc(),
559  InnerLoop->getHeader())
560  << "Loop interchanged with enclosing loop.";
561  });
562 
563  LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LoopNestExit,
564  LIL);
565  LIT.transform();
566  LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
567  LoopsInterchanged++;
568 
569  assert(InnerLoop->isLCSSAForm(*DT) &&
570  "Inner loop not left in LCSSA form after loop interchange!");
571  assert(OuterLoop->isLCSSAForm(*DT) &&
572  "Outer loop not left in LCSSA form after loop interchange!");
573 
574  return true;
575  }
576 };
577 
578 } // end anonymous namespace
579 
580 bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
581  return any_of(*BB, [](const Instruction &I) {
582  return I.mayHaveSideEffects() || I.mayReadFromMemory();
583  });
584 }
585 
586 bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
587  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
588  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
589  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
590 
591  LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n");
592 
593  // A perfectly nested loop will not have any branch in between the outer and
594  // inner block i.e. outer header will branch to either inner preheader and
595  // outerloop latch.
596  BranchInst *OuterLoopHeaderBI =
597  dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
598  if (!OuterLoopHeaderBI)
599  return false;
600 
601  for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
602  if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
603  Succ != OuterLoopLatch)
604  return false;
605 
606  LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
607  // We do not have any basic block in between now make sure the outer header
608  // and outer loop latch doesn't contain any unsafe instructions.
609  if (containsUnsafeInstructions(OuterLoopHeader) ||
610  containsUnsafeInstructions(OuterLoopLatch))
611  return false;
612 
613  // Also make sure the inner loop preheader does not contain any unsafe
614  // instructions. Note that all instructions in the preheader will be moved to
615  // the outer loop header when interchanging.
616  if (InnerLoopPreHeader != OuterLoopHeader &&
617  containsUnsafeInstructions(InnerLoopPreHeader))
618  return false;
619 
620  LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
621  // We have a perfect loop nest.
622  return true;
623 }
624 
625 bool LoopInterchangeLegality::isLoopStructureUnderstood(
626  PHINode *InnerInduction) {
627  unsigned Num = InnerInduction->getNumOperands();
628  BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
629  for (unsigned i = 0; i < Num; ++i) {
630  Value *Val = InnerInduction->getOperand(i);
631  if (isa<Constant>(Val))
632  continue;
633  Instruction *I = dyn_cast<Instruction>(Val);
634  if (!I)
635  return false;
636  // TODO: Handle triangular loops.
637  // e.g. for(int i=0;i<N;i++)
638  // for(int j=i;j<N;j++)
639  unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
640  if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
641  InnerLoopPreheader &&
642  !OuterLoop->isLoopInvariant(I)) {
643  return false;
644  }
645  }
646  return true;
647 }
648 
649 // If SV is a LCSSA PHI node with a single incoming value, return the incoming
650 // value.
651 static Value *followLCSSA(Value *SV) {
652  PHINode *PHI = dyn_cast<PHINode>(SV);
653  if (!PHI)
654  return SV;
655 
656  if (PHI->getNumIncomingValues() != 1)
657  return SV;
658  return followLCSSA(PHI->getIncomingValue(0));
659 }
660 
661 // Check V's users to see if it is involved in a reduction in L.
663  // Reduction variables cannot be constants.
664  if (isa<Constant>(V))
665  return nullptr;
666 
667  for (Value *User : V->users()) {
668  if (PHINode *PHI = dyn_cast<PHINode>(User)) {
669  if (PHI->getNumIncomingValues() == 1)
670  continue;
672  if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
673  return PHI;
674  return nullptr;
675  }
676  }
677 
678  return nullptr;
679 }
680 
681 bool LoopInterchangeLegality::findInductionAndReductions(
682  Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
683  if (!L->getLoopLatch() || !L->getLoopPredecessor())
684  return false;
685  for (PHINode &PHI : L->getHeader()->phis()) {
688  if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
689  Inductions.push_back(&PHI);
690  else {
691  // PHIs in inner loops need to be part of a reduction in the outer loop,
692  // discovered when checking the PHIs of the outer loop earlier.
693  if (!InnerLoop) {
694  if (!OuterInnerReductions.count(&PHI)) {
695  LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
696  "across the outer loop.\n");
697  return false;
698  }
699  } else {
700  assert(PHI.getNumIncomingValues() == 2 &&
701  "Phis in loop header should have exactly 2 incoming values");
702  // Check if we have a PHI node in the outer loop that has a reduction
703  // result from the inner loop as an incoming value.
704  Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
705  PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
706  if (!InnerRedPhi ||
707  !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
708  LLVM_DEBUG(
709  dbgs()
710  << "Failed to recognize PHI as an induction or reduction.\n");
711  return false;
712  }
713  OuterInnerReductions.insert(&PHI);
714  OuterInnerReductions.insert(InnerRedPhi);
715  }
716  }
717  }
718  return true;
719 }
720 
721 // This function indicates the current limitations in the transform as a result
722 // of which we do not proceed.
723 bool LoopInterchangeLegality::currentLimitations() {
724  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
725  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
726 
727  // transform currently expects the loop latches to also be the exiting
728  // blocks.
729  if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
730  OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
731  !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
732  !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
733  LLVM_DEBUG(
734  dbgs() << "Loops where the latch is not the exiting block are not"
735  << " supported currently.\n");
736  ORE->emit([&]() {
737  return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch",
738  OuterLoop->getStartLoc(),
739  OuterLoop->getHeader())
740  << "Loops where the latch is not the exiting block cannot be"
741  " interchange currently.";
742  });
743  return true;
744  }
745 
746  PHINode *InnerInductionVar;
747  SmallVector<PHINode *, 8> Inductions;
748  if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
749  LLVM_DEBUG(
750  dbgs() << "Only outer loops with induction or reduction PHI nodes "
751  << "are supported currently.\n");
752  ORE->emit([&]() {
753  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
754  OuterLoop->getStartLoc(),
755  OuterLoop->getHeader())
756  << "Only outer loops with induction or reduction PHI nodes can be"
757  " interchanged currently.";
758  });
759  return true;
760  }
761 
762  // TODO: Currently we handle only loops with 1 induction variable.
763  if (Inductions.size() != 1) {
764  LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not "
765  << "supported currently.\n");
766  ORE->emit([&]() {
767  return OptimizationRemarkMissed(DEBUG_TYPE, "MultiIndutionOuter",
768  OuterLoop->getStartLoc(),
769  OuterLoop->getHeader())
770  << "Only outer loops with 1 induction variable can be "
771  "interchanged currently.";
772  });
773  return true;
774  }
775 
776  Inductions.clear();
777  if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) {
778  LLVM_DEBUG(
779  dbgs() << "Only inner loops with induction or reduction PHI nodes "
780  << "are supported currently.\n");
781  ORE->emit([&]() {
782  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
783  InnerLoop->getStartLoc(),
784  InnerLoop->getHeader())
785  << "Only inner loops with induction or reduction PHI nodes can be"
786  " interchange currently.";
787  });
788  return true;
789  }
790 
791  // TODO: Currently we handle only loops with 1 induction variable.
792  if (Inductions.size() != 1) {
793  LLVM_DEBUG(
794  dbgs() << "We currently only support loops with 1 induction variable."
795  << "Failed to interchange due to current limitation\n");
796  ORE->emit([&]() {
797  return OptimizationRemarkMissed(DEBUG_TYPE, "MultiInductionInner",
798  InnerLoop->getStartLoc(),
799  InnerLoop->getHeader())
800  << "Only inner loops with 1 induction variable can be "
801  "interchanged currently.";
802  });
803  return true;
804  }
805  InnerInductionVar = Inductions.pop_back_val();
806 
807  // TODO: Triangular loops are not handled for now.
808  if (!isLoopStructureUnderstood(InnerInductionVar)) {
809  LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n");
810  ORE->emit([&]() {
811  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner",
812  InnerLoop->getStartLoc(),
813  InnerLoop->getHeader())
814  << "Inner loop structure not understood currently.";
815  });
816  return true;
817  }
818 
819  // TODO: Current limitation: Since we split the inner loop latch at the point
820  // were induction variable is incremented (induction.next); We cannot have
821  // more than 1 user of induction.next since it would result in broken code
822  // after split.
823  // e.g.
824  // for(i=0;i<N;i++) {
825  // for(j = 0;j<M;j++) {
826  // A[j+1][i+2] = A[j][i]+k;
827  // }
828  // }
829  Instruction *InnerIndexVarInc = nullptr;
830  if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader)
831  InnerIndexVarInc =
832  dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1));
833  else
834  InnerIndexVarInc =
835  dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0));
836 
837  if (!InnerIndexVarInc) {
838  LLVM_DEBUG(
839  dbgs() << "Did not find an instruction to increment the induction "
840  << "variable.\n");
841  ORE->emit([&]() {
842  return OptimizationRemarkMissed(DEBUG_TYPE, "NoIncrementInInner",
843  InnerLoop->getStartLoc(),
844  InnerLoop->getHeader())
845  << "The inner loop does not increment the induction variable.";
846  });
847  return true;
848  }
849 
850  // Since we split the inner loop latch on this induction variable. Make sure
851  // we do not have any instruction between the induction variable and branch
852  // instruction.
853 
854  bool FoundInduction = false;
855  for (const Instruction &I :
856  llvm::reverse(InnerLoopLatch->instructionsWithoutDebug())) {
857  if (isa<BranchInst>(I) || isa<CmpInst>(I) || isa<TruncInst>(I) ||
858  isa<ZExtInst>(I))
859  continue;
860 
861  // We found an instruction. If this is not induction variable then it is not
862  // safe to split this loop latch.
863  if (!I.isIdenticalTo(InnerIndexVarInc)) {
864  LLVM_DEBUG(dbgs() << "Found unsupported instructions between induction "
865  << "variable increment and branch.\n");
866  ORE->emit([&]() {
868  DEBUG_TYPE, "UnsupportedInsBetweenInduction",
869  InnerLoop->getStartLoc(), InnerLoop->getHeader())
870  << "Found unsupported instruction between induction variable "
871  "increment and branch.";
872  });
873  return true;
874  }
875 
876  FoundInduction = true;
877  break;
878  }
879  // The loop latch ended and we didn't find the induction variable return as
880  // current limitation.
881  if (!FoundInduction) {
882  LLVM_DEBUG(dbgs() << "Did not find the induction variable.\n");
883  ORE->emit([&]() {
884  return OptimizationRemarkMissed(DEBUG_TYPE, "NoIndutionVariable",
885  InnerLoop->getStartLoc(),
886  InnerLoop->getHeader())
887  << "Did not find the induction variable.";
888  });
889  return true;
890  }
891  return false;
892 }
893 
894 // We currently only support LCSSA PHI nodes in the inner loop exit, if their
895 // users are either reduction PHIs or PHIs outside the outer loop (which means
896 // the we are only interested in the final value after the loop).
897 static bool
899  SmallPtrSetImpl<PHINode *> &Reductions) {
900  BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
901  for (PHINode &PHI : InnerExit->phis()) {
902  // Reduction lcssa phi will have only 1 incoming block that from loop latch.
903  if (PHI.getNumIncomingValues() > 1)
904  return false;
905  if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
906  PHINode *PN = dyn_cast<PHINode>(U);
907  return !PN ||
908  (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
909  })) {
910  return false;
911  }
912  }
913  return true;
914 }
915 
916 // We currently support LCSSA PHI nodes in the outer loop exit, if their
917 // incoming values do not come from the outer loop latch or if the
918 // outer loop latch has a single predecessor. In that case, the value will
919 // be available if both the inner and outer loop conditions are true, which
920 // will still be true after interchanging. If we have multiple predecessor,
921 // that may not be the case, e.g. because the outer loop latch may be executed
922 // if the inner loop is not executed.
923 static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
924  BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
925  for (PHINode &PHI : LoopNestExit->phis()) {
926  // FIXME: We currently are not able to detect floating point reductions
927  // and have to use floating point PHIs as a proxy to prevent
928  // interchanging in the presence of floating point reductions.
929  if (PHI.getType()->isFloatingPointTy())
930  return false;
931  for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
932  Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
933  if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
934  continue;
935 
936  // The incoming value is defined in the outer loop latch. Currently we
937  // only support that in case the outer loop latch has a single predecessor.
938  // This guarantees that the outer loop latch is executed if and only if
939  // the inner loop is executed (because tightlyNested() guarantees that the
940  // outer loop header only branches to the inner loop or the outer loop
941  // latch).
942  // FIXME: We could weaken this logic and allow multiple predecessors,
943  // if the values are produced outside the loop latch. We would need
944  // additional logic to update the PHI nodes in the exit block as
945  // well.
946  if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
947  return false;
948  }
949  }
950  return true;
951 }
952 
953 bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
954  unsigned OuterLoopId,
955  CharMatrix &DepMatrix) {
956  if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
957  LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
958  << " and OuterLoopId = " << OuterLoopId
959  << " due to dependence\n");
960  ORE->emit([&]() {
961  return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",
962  InnerLoop->getStartLoc(),
963  InnerLoop->getHeader())
964  << "Cannot interchange loops due to dependences.";
965  });
966  return false;
967  }
968  // Check if outer and inner loop contain legal instructions only.
969  for (auto *BB : OuterLoop->blocks())
970  for (Instruction &I : BB->instructionsWithoutDebug())
971  if (CallInst *CI = dyn_cast<CallInst>(&I)) {
972  // readnone functions do not prevent interchanging.
973  if (CI->doesNotReadMemory())
974  continue;
975  LLVM_DEBUG(
976  dbgs() << "Loops with call instructions cannot be interchanged "
977  << "safely.");
978  ORE->emit([&]() {
979  return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst",
980  CI->getDebugLoc(),
981  CI->getParent())
982  << "Cannot interchange loops due to call instruction.";
983  });
984 
985  return false;
986  }
987 
988  // TODO: The loops could not be interchanged due to current limitations in the
989  // transform module.
990  if (currentLimitations()) {
991  LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n");
992  return false;
993  }
994 
995  // Check if the loops are tightly nested.
996  if (!tightlyNested(OuterLoop, InnerLoop)) {
997  LLVM_DEBUG(dbgs() << "Loops not tightly nested\n");
998  ORE->emit([&]() {
999  return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested",
1000  InnerLoop->getStartLoc(),
1001  InnerLoop->getHeader())
1002  << "Cannot interchange loops because they are not tightly "
1003  "nested.";
1004  });
1005  return false;
1006  }
1007 
1008  if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1009  OuterInnerReductions)) {
1010  LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n");
1011  ORE->emit([&]() {
1012  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1013  InnerLoop->getStartLoc(),
1014  InnerLoop->getHeader())
1015  << "Found unsupported PHI node in loop exit.";
1016  });
1017  return false;
1018  }
1019 
1020  if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1021  LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
1022  ORE->emit([&]() {
1023  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1024  OuterLoop->getStartLoc(),
1025  OuterLoop->getHeader())
1026  << "Found unsupported PHI node in loop exit.";
1027  });
1028  return false;
1029  }
1030 
1031  return true;
1032 }
1033 
1034 int LoopInterchangeProfitability::getInstrOrderCost() {
1035  unsigned GoodOrder, BadOrder;
1036  BadOrder = GoodOrder = 0;
1037  for (BasicBlock *BB : InnerLoop->blocks()) {
1038  for (Instruction &Ins : *BB) {
1039  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1040  unsigned NumOp = GEP->getNumOperands();
1041  bool FoundInnerInduction = false;
1042  bool FoundOuterInduction = false;
1043  for (unsigned i = 0; i < NumOp; ++i) {
1044  // Skip operands that are not SCEV-able.
1045  if (!SE->isSCEVable(GEP->getOperand(i)->getType()))
1046  continue;
1047 
1048  const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
1049  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1050  if (!AR)
1051  continue;
1052 
1053  // If we find the inner induction after an outer induction e.g.
1054  // for(int i=0;i<N;i++)
1055  // for(int j=0;j<N;j++)
1056  // A[i][j] = A[i-1][j-1]+k;
1057  // then it is a good order.
1058  if (AR->getLoop() == InnerLoop) {
1059  // We found an InnerLoop induction after OuterLoop induction. It is
1060  // a good order.
1061  FoundInnerInduction = true;
1062  if (FoundOuterInduction) {
1063  GoodOrder++;
1064  break;
1065  }
1066  }
1067  // If we find the outer induction after an inner induction e.g.
1068  // for(int i=0;i<N;i++)
1069  // for(int j=0;j<N;j++)
1070  // A[j][i] = A[j-1][i-1]+k;
1071  // then it is a bad order.
1072  if (AR->getLoop() == OuterLoop) {
1073  // We found an OuterLoop induction after InnerLoop induction. It is
1074  // a bad order.
1075  FoundOuterInduction = true;
1076  if (FoundInnerInduction) {
1077  BadOrder++;
1078  break;
1079  }
1080  }
1081  }
1082  }
1083  }
1084  }
1085  return GoodOrder - BadOrder;
1086 }
1087 
1088 static bool isProfitableForVectorization(unsigned InnerLoopId,
1089  unsigned OuterLoopId,
1090  CharMatrix &DepMatrix) {
1091  // TODO: Improve this heuristic to catch more cases.
1092  // If the inner loop is loop independent or doesn't carry any dependency it is
1093  // profitable to move this to outer position.
1094  for (auto &Row : DepMatrix) {
1095  if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I')
1096  return false;
1097  // TODO: We need to improve this heuristic.
1098  if (Row[OuterLoopId] != '=')
1099  return false;
1100  }
1101  // If outer loop has dependence and inner loop is loop independent then it is
1102  // profitable to interchange to enable parallelism.
1103  // If there are no dependences, interchanging will not improve anything.
1104  return !DepMatrix.empty();
1105 }
1106 
1107 bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
1108  unsigned OuterLoopId,
1109  CharMatrix &DepMatrix) {
1110  // TODO: Add better profitability checks.
1111  // e.g
1112  // 1) Construct dependency matrix and move the one with no loop carried dep
1113  // inside to enable vectorization.
1114 
1115  // This is rough cost estimation algorithm. It counts the good and bad order
1116  // of induction variables in the instruction and allows reordering if number
1117  // of bad orders is more than good.
1118  int Cost = getInstrOrderCost();
1119  LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
1120  if (Cost < -LoopInterchangeCostThreshold)
1121  return true;
1122 
1123  // It is not profitable as per current cache profitability model. But check if
1124  // we can move this loop outside to improve parallelism.
1125  if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
1126  return true;
1127 
1128  ORE->emit([&]() {
1129  return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1130  InnerLoop->getStartLoc(),
1131  InnerLoop->getHeader())
1132  << "Interchanging loops is too costly (cost="
1133  << ore::NV("Cost", Cost) << ", threshold="
1134  << ore::NV("Threshold", LoopInterchangeCostThreshold)
1135  << ") and it does not improve parallelism.";
1136  });
1137  return false;
1138 }
1139 
1140 void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1141  Loop *InnerLoop) {
1142  for (Loop *L : *OuterLoop)
1143  if (L == InnerLoop) {
1144  OuterLoop->removeChildLoop(L);
1145  return;
1146  }
1147  llvm_unreachable("Couldn't find loop");
1148 }
1149 
1150 /// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1151 /// new inner and outer loop after interchanging: NewInner is the original
1152 /// outer loop and NewOuter is the original inner loop.
1153 ///
1154 /// Before interchanging, we have the following structure
1155 /// Outer preheader
1156 // Outer header
1157 // Inner preheader
1158 // Inner header
1159 // Inner body
1160 // Inner latch
1161 // outer bbs
1162 // Outer latch
1163 //
1164 // After interchanging:
1165 // Inner preheader
1166 // Inner header
1167 // Outer preheader
1168 // Outer header
1169 // Inner body
1170 // outer bbs
1171 // Outer latch
1172 // Inner latch
1173 void LoopInterchangeTransform::restructureLoops(
1174  Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1175  BasicBlock *OrigOuterPreHeader) {
1176  Loop *OuterLoopParent = OuterLoop->getParentLoop();
1177  // The original inner loop preheader moves from the new inner loop to
1178  // the parent loop, if there is one.
1179  NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1180  LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1181 
1182  // Switch the loop levels.
1183  if (OuterLoopParent) {
1184  // Remove the loop from its parent loop.
1185  removeChildLoop(OuterLoopParent, NewInner);
1186  removeChildLoop(NewInner, NewOuter);
1187  OuterLoopParent->addChildLoop(NewOuter);
1188  } else {
1189  removeChildLoop(NewInner, NewOuter);
1190  LI->changeTopLevelLoop(NewInner, NewOuter);
1191  }
1192  while (!NewOuter->isInnermost())
1193  NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1194  NewOuter->addChildLoop(NewInner);
1195 
1196  // BBs from the original inner loop.
1197  SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1198 
1199  // Add BBs from the original outer loop to the original inner loop (excluding
1200  // BBs already in inner loop)
1201  for (BasicBlock *BB : NewInner->blocks())
1202  if (LI->getLoopFor(BB) == NewInner)
1203  NewOuter->addBlockEntry(BB);
1204 
1205  // Now remove inner loop header and latch from the new inner loop and move
1206  // other BBs (the loop body) to the new inner loop.
1207  BasicBlock *OuterHeader = NewOuter->getHeader();
1208  BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1209  for (BasicBlock *BB : OrigInnerBBs) {
1210  // Nothing will change for BBs in child loops.
1211  if (LI->getLoopFor(BB) != NewOuter)
1212  continue;
1213  // Remove the new outer loop header and latch from the new inner loop.
1214  if (BB == OuterHeader || BB == OuterLatch)
1215  NewInner->removeBlockFromLoop(BB);
1216  else
1217  LI->changeLoopFor(BB, NewInner);
1218  }
1219 
1220  // The preheader of the original outer loop becomes part of the new
1221  // outer loop.
1222  NewOuter->addBlockEntry(OrigOuterPreHeader);
1223  LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1224 
1225  // Tell SE that we move the loops around.
1226  SE->forgetLoop(NewOuter);
1227  SE->forgetLoop(NewInner);
1228 }
1229 
1231  bool Transformed = false;
1232  Instruction *InnerIndexVar;
1233 
1234  if (InnerLoop->getSubLoops().empty()) {
1235  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1236  LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n");
1237  PHINode *InductionPHI = getInductionVariable(InnerLoop, SE);
1238  if (!InductionPHI) {
1239  LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
1240  return false;
1241  }
1242 
1243  if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1244  InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(1));
1245  else
1246  InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0));
1247 
1248  // Ensure that InductionPHI is the first Phi node.
1249  if (&InductionPHI->getParent()->front() != InductionPHI)
1250  InductionPHI->moveBefore(&InductionPHI->getParent()->front());
1251 
1252  // Create a new latch block for the inner loop. We split at the
1253  // current latch's terminator and then move the condition and all
1254  // operands that are not either loop-invariant or the induction PHI into the
1255  // new latch block.
1256  BasicBlock *NewLatch =
1257  SplitBlock(InnerLoop->getLoopLatch(),
1258  InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
1259 
1261  unsigned i = 0;
1262  auto MoveInstructions = [&i, &WorkList, this, InductionPHI, NewLatch]() {
1263  for (; i < WorkList.size(); i++) {
1264  // Duplicate instruction and move it the new latch. Update uses that
1265  // have been moved.
1266  Instruction *NewI = WorkList[i]->clone();
1267  NewI->insertBefore(NewLatch->getFirstNonPHI());
1268  assert(!NewI->mayHaveSideEffects() &&
1269  "Moving instructions with side-effects may change behavior of "
1270  "the loop nest!");
1271  for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) {
1272  Instruction *UserI = cast<Instruction>(U.getUser());
1273  if (!InnerLoop->contains(UserI->getParent()) ||
1274  UserI->getParent() == NewLatch || UserI == InductionPHI)
1275  U.set(NewI);
1276  }
1277  // Add operands of moved instruction to the worklist, except if they are
1278  // outside the inner loop or are the induction PHI.
1279  for (Value *Op : WorkList[i]->operands()) {
1280  Instruction *OpI = dyn_cast<Instruction>(Op);
1281  if (!OpI ||
1282  this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
1283  OpI == InductionPHI)
1284  continue;
1285  WorkList.insert(OpI);
1286  }
1287  }
1288  };
1289 
1290  // FIXME: Should we interchange when we have a constant condition?
1291  Instruction *CondI = dyn_cast<Instruction>(
1292  cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
1293  ->getCondition());
1294  if (CondI)
1295  WorkList.insert(CondI);
1296  MoveInstructions();
1297  WorkList.insert(cast<Instruction>(InnerIndexVar));
1298  MoveInstructions();
1299 
1300  // Splits the inner loops phi nodes out into a separate basic block.
1301  BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1302  SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1303  LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
1304  }
1305 
1306  // Instructions in the original inner loop preheader may depend on values
1307  // defined in the outer loop header. Move them there, because the original
1308  // inner loop preheader will become the entry into the interchanged loop nest.
1309  // Currently we move all instructions and rely on LICM to move invariant
1310  // instructions outside the loop nest.
1311  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1312  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1313  if (InnerLoopPreHeader != OuterLoopHeader) {
1314  SmallPtrSet<Instruction *, 4> NeedsMoving;
1315  for (Instruction &I :
1316  make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
1317  std::prev(InnerLoopPreHeader->end()))))
1318  I.moveBefore(OuterLoopHeader->getTerminator());
1319  }
1320 
1321  Transformed |= adjustLoopLinks();
1322  if (!Transformed) {
1323  LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");
1324  return false;
1325  }
1326 
1327  return true;
1328 }
1329 
1330 /// \brief Move all instructions except the terminator from FromBB right before
1331 /// InsertBefore
1332 static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1333  auto &ToList = InsertBefore->getParent()->getInstList();
1334  auto &FromList = FromBB->getInstList();
1335 
1336  ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
1337  FromBB->getTerminator()->getIterator());
1338 }
1339 
1340 /// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
1341 static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
1342  // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
1343  // from BB1 afterwards.
1344  auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
1345  SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
1346  for (Instruction *I : TempInstrs)
1347  I->removeFromParent();
1348 
1349  // Move instructions from BB2 to BB1.
1350  moveBBContents(BB2, BB1->getTerminator());
1351 
1352  // Move instructions from TempInstrs to BB2.
1353  for (Instruction *I : TempInstrs)
1354  I->insertBefore(BB2->getTerminator());
1355 }
1356 
1357 // Update BI to jump to NewBB instead of OldBB. Records updates to the
1358 // dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1359 // \p OldBB is exactly once in BI's successor list.
1360 static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1361  BasicBlock *NewBB,
1362  std::vector<DominatorTree::UpdateType> &DTUpdates,
1363  bool MustUpdateOnce = true) {
1364  assert((!MustUpdateOnce ||
1366  [OldBB](BasicBlock *BB) {
1367  return BB == OldBB;
1368  }) == 1) && "BI must jump to OldBB exactly once.");
1369  bool Changed = false;
1370  for (Use &Op : BI->operands())
1371  if (Op == OldBB) {
1372  Op.set(NewBB);
1373  Changed = true;
1374  }
1375 
1376  if (Changed) {
1377  DTUpdates.push_back(
1379  DTUpdates.push_back(
1381  }
1382  assert(Changed && "Expected a successor to be updated");
1383 }
1384 
1385 // Move Lcssa PHIs to the right place.
1386 static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
1387  BasicBlock *InnerLatch, BasicBlock *OuterHeader,
1388  BasicBlock *OuterLatch, BasicBlock *OuterExit,
1389  Loop *InnerLoop, LoopInfo *LI) {
1390 
1391  // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
1392  // defined either in the header or latch. Those blocks will become header and
1393  // latch of the new outer loop, and the only possible users can PHI nodes
1394  // in the exit block of the loop nest or the outer loop header (reduction
1395  // PHIs, in that case, the incoming value must be defined in the inner loop
1396  // header). We can just substitute the user with the incoming value and remove
1397  // the PHI.
1398  for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
1399  assert(P.getNumIncomingValues() == 1 &&
1400  "Only loops with a single exit are supported!");
1401 
1402  // Incoming values are guaranteed be instructions currently.
1403  auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
1404  // Skip phis with incoming values from the inner loop body, excluding the
1405  // header and latch.
1406  if (IncI->getParent() != InnerLatch && IncI->getParent() != InnerHeader)
1407  continue;
1408 
1409  assert(all_of(P.users(),
1410  [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {
1411  return (cast<PHINode>(U)->getParent() == OuterHeader &&
1412  IncI->getParent() == InnerHeader) ||
1413  cast<PHINode>(U)->getParent() == OuterExit;
1414  }) &&
1415  "Can only replace phis iff the uses are in the loop nest exit or "
1416  "the incoming value is defined in the inner header (it will "
1417  "dominate all loop blocks after interchanging)");
1418  P.replaceAllUsesWith(IncI);
1419  P.eraseFromParent();
1420  }
1421 
1422  SmallVector<PHINode *, 8> LcssaInnerExit;
1423  for (PHINode &P : InnerExit->phis())
1424  LcssaInnerExit.push_back(&P);
1425 
1426  SmallVector<PHINode *, 8> LcssaInnerLatch;
1427  for (PHINode &P : InnerLatch->phis())
1428  LcssaInnerLatch.push_back(&P);
1429 
1430  // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1431  // If a PHI node has users outside of InnerExit, it has a use outside the
1432  // interchanged loop and we have to preserve it. We move these to
1433  // InnerLatch, which will become the new exit block for the innermost
1434  // loop after interchanging.
1435  for (PHINode *P : LcssaInnerExit)
1436  P->moveBefore(InnerLatch->getFirstNonPHI());
1437 
1438  // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1439  // and we have to move them to the new inner latch.
1440  for (PHINode *P : LcssaInnerLatch)
1441  P->moveBefore(InnerExit->getFirstNonPHI());
1442 
1443  // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1444  // incoming values defined in the outer loop, we have to add a new PHI
1445  // in the inner loop latch, which became the exit block of the outer loop,
1446  // after interchanging.
1447  if (OuterExit) {
1448  for (PHINode &P : OuterExit->phis()) {
1449  if (P.getNumIncomingValues() != 1)
1450  continue;
1451  // Skip Phis with incoming values defined in the inner loop. Those should
1452  // already have been updated.
1453  auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
1454  if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
1455  continue;
1456 
1457  PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
1458  NewPhi->setIncomingValue(0, P.getIncomingValue(0));
1459  NewPhi->setIncomingBlock(0, OuterLatch);
1460  NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
1461  P.setIncomingValue(0, NewPhi);
1462  }
1463  }
1464 
1465  // Now adjust the incoming blocks for the LCSSA PHIs.
1466  // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1467  // with the new latch.
1468  InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
1469 }
1470 
1471 bool LoopInterchangeTransform::adjustLoopBranches() {
1472  LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
1473  std::vector<DominatorTree::UpdateType> DTUpdates;
1474 
1475  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1476  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1477 
1478  assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1479  InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
1480  InnerLoopPreHeader && "Guaranteed by loop-simplify form");
1481  // Ensure that both preheaders do not contain PHI nodes and have single
1482  // predecessors. This allows us to move them easily. We use
1483  // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1484  // preheaders do not satisfy those conditions.
1485  if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1486  !OuterLoopPreHeader->getUniquePredecessor())
1487  OuterLoopPreHeader =
1488  InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
1489  if (InnerLoopPreHeader == OuterLoop->getHeader())
1490  InnerLoopPreHeader =
1491  InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
1492 
1493  // Adjust the loop preheader
1494  BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1495  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1496  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1497  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1498  BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1499  BasicBlock *InnerLoopLatchPredecessor =
1500  InnerLoopLatch->getUniquePredecessor();
1501  BasicBlock *InnerLoopLatchSuccessor;
1502  BasicBlock *OuterLoopLatchSuccessor;
1503 
1504  BranchInst *OuterLoopLatchBI =
1505  dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1506  BranchInst *InnerLoopLatchBI =
1507  dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1508  BranchInst *OuterLoopHeaderBI =
1509  dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1510  BranchInst *InnerLoopHeaderBI =
1511  dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1512 
1513  if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1514  !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1515  !InnerLoopHeaderBI)
1516  return false;
1517 
1518  BranchInst *InnerLoopLatchPredecessorBI =
1519  dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1520  BranchInst *OuterLoopPredecessorBI =
1521  dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1522 
1523  if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1524  return false;
1525  BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1526  if (!InnerLoopHeaderSuccessor)
1527  return false;
1528 
1529  // Adjust Loop Preheader and headers.
1530  // The branches in the outer loop predecessor and the outer loop header can
1531  // be unconditional branches or conditional branches with duplicates. Consider
1532  // this when updating the successors.
1533  updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1534  InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
1535  // The outer loop header might or might not branch to the outer latch.
1536  // We are guaranteed to branch to the inner loop preheader.
1537  if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch))
1538  updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates,
1539  /*MustUpdateOnce=*/false);
1540  updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1541  InnerLoopHeaderSuccessor, DTUpdates,
1542  /*MustUpdateOnce=*/false);
1543 
1544  // Adjust reduction PHI's now that the incoming block has changed.
1545  InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
1546  OuterLoopHeader);
1547 
1548  updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1549  OuterLoopPreHeader, DTUpdates);
1550 
1551  // -------------Adjust loop latches-----------
1552  if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1553  InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1554  else
1555  InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1556 
1557  updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1558  InnerLoopLatchSuccessor, DTUpdates);
1559 
1560 
1561  if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1562  OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1563  else
1564  OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1565 
1566  updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1567  OuterLoopLatchSuccessor, DTUpdates);
1568  updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1569  DTUpdates);
1570 
1571  DT->applyUpdates(DTUpdates);
1572  restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1573  OuterLoopPreHeader);
1574 
1575  moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1576  OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
1577  InnerLoop, LI);
1578  // For PHIs in the exit block of the outer loop, outer's latch has been
1579  // replaced by Inners'.
1580  OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1581 
1582  // Now update the reduction PHIs in the inner and outer loop headers.
1583  SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1584  for (PHINode &PHI : drop_begin(InnerLoopHeader->phis()))
1585  InnerLoopPHIs.push_back(cast<PHINode>(&PHI));
1586  for (PHINode &PHI : drop_begin(OuterLoopHeader->phis()))
1587  OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
1588 
1589  auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1590  (void)OuterInnerReductions;
1591 
1592  // Now move the remaining reduction PHIs from outer to inner loop header and
1593  // vice versa. The PHI nodes must be part of a reduction across the inner and
1594  // outer loop and all the remains to do is and updating the incoming blocks.
1595  for (PHINode *PHI : OuterLoopPHIs) {
1596  PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
1597  assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
1598  }
1599  for (PHINode *PHI : InnerLoopPHIs) {
1600  PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
1601  assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
1602  }
1603 
1604  // Update the incoming blocks for moved PHI nodes.
1605  OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
1606  OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
1607  InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
1608  InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1609 
1610  // Values defined in the outer loop header could be used in the inner loop
1611  // latch. In that case, we need to create LCSSA phis for them, because after
1612  // interchanging they will be defined in the new inner loop and used in the
1613  // new outer loop.
1614  IRBuilder<> Builder(OuterLoopHeader->getContext());
1615  SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
1616  for (Instruction &I :
1617  make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
1618  MayNeedLCSSAPhis.push_back(&I);
1619  formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE, Builder);
1620 
1621  return true;
1622 }
1623 
1624 bool LoopInterchangeTransform::adjustLoopLinks() {
1625  // Adjust all branches in the inner and outer loop.
1626  bool Changed = adjustLoopBranches();
1627  if (Changed) {
1628  // We have interchanged the preheaders so we need to interchange the data in
1629  // the preheaders as well. This is because the content of the inner
1630  // preheader was previously executed inside the outer loop.
1631  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1632  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1633  swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
1634  }
1635  return Changed;
1636 }
1637 
1638 /// Main LoopInterchange Pass.
1640  static char ID;
1641 
1644  }
1645 
1646  void getAnalysisUsage(AnalysisUsage &AU) const override {
1649 
1651  }
1652 
1653  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1654  if (skipLoop(L))
1655  return false;
1656 
1657  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1658  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1659  auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
1660  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1661  auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1662 
1663  return LoopInterchange(SE, LI, DI, DT, ORE).run(L);
1664  }
1665 };
1666 
1668 
1670  "Interchanges loops for cache reuse", false, false)
1674 
1676  "Interchanges loops for cache reuse", false, false)
1677 
1679  return new LoopInterchangeLegacyPass();
1680 }
1681 
1683  LoopAnalysisManager &AM,
1685  LPMUpdater &U) {
1686  Function &F = *LN.getParent();
1687 
1688  DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
1690  if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(LN))
1691  return PreservedAnalyses::all();
1693 }
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::initializeLoopInterchangeLegacyPassPass
void initializeLoopInterchangeLegacyPassPass(PassRegistry &)
isLegalToInterChangeLoops
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
Definition: LoopInterchange.cpp:256
moveBBContents
static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore)
Move all instructions except the terminator from FromBB right before InsertBefore.
Definition: LoopInterchange.cpp:1332
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
followLCSSA
static Value * followLCSSA(Value *SV)
Definition: LoopInterchange.cpp:651
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:729
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:63
llvm
This class represents lattice values for constants.
Definition: AllocatorList.h:23
llvm::LoopBase::getUniqueExitBlock
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:138
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:275
llvm::BranchInst::successors
iterator_range< succ_op_iterator > successors()
Definition: Instructions.h:3115
llvm::LoopBase::getExitBlock
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:82
llvm::PHINode::incoming_values
op_range incoming_values()
Definition: Instructions.h:2656
llvm::RecurrenceDescriptor::isReductionPHI
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Returns true if Phi is a reduction in TheLoop.
Definition: IVDescriptors.cpp:618
llvm::User::operands
op_range operands()
Definition: User.h:242
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:378
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:229
Scalar.h
llvm::DependenceAnalysisWrapperPass
Legacy pass manager pass to access dependence information.
Definition: DependenceAnalysis.h:979
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:529
llvm::cfg::UpdateKind::Insert
@ Insert
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopInterchange.cpp:55
uses
This might compile to this xmm1 xorps xmm0 movss xmm0 ret Now consider if the code caused xmm1 to get spilled This might produce this xmm1 movaps xmm0 movaps xmm1 movss xmm0 ret since the reload is only used by these we could fold it into the uses
Definition: README-SSE.txt:258
llvm::SmallVector< Loop *, 8 >
Statistic.h
llvm::LoopBase::getLoopPredecessor
BlockT * getLoopPredecessor() const
If the given loop's header has exactly one unique predecessor outside the loop, return it.
Definition: LoopInfoImpl.h:195
llvm::Dependence::DVEntry::EQ
@ EQ
Definition: DependenceAnalysis.h:91
ErrorHandling.h
llvm::IRBuilder<>
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:626
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
llvm::LoopInfoBase::changeLoopFor
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
Definition: LoopInfo.h:1001
moveLCSSAPhis
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
Definition: LoopInterchange.cpp:1386
OptimizationRemarkEmitter.h
llvm::Instruction::insertBefore
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:83
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:150
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
isOuterMostDepPositive
static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row, unsigned Column)
Definition: LoopInterchange.cpp:195
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:338
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
llvm::cfg::UpdateKind::Delete
@ Delete
interchange
loop interchange
Definition: LoopInterchange.cpp:1675
llvm::LoopBase::begin
iterator begin() const
Definition: LoopInfo.h:154
findInnerReductionPhi
static PHINode * findInnerReductionPhi(Loop *L, Value *V)
Definition: LoopInterchange.cpp:662
llvm::SmallPtrSet< PHINode *, 4 >
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:128
LoopInterchangeLegacyPass::LoopInterchangeLegacyPass
LoopInterchangeLegacyPass()
Definition: LoopInterchange.cpp:1642
llvm::codeview::VFTableSlotKind::Outer
@ Outer
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
LoopInterchangeLegacyPass
Main LoopInterchange Pass.
Definition: LoopInterchange.cpp:1639
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:260
llvm::SCEVConstant::getValue
ConstantInt * getValue() const
Definition: ScalarEvolutionExpressions.h:56
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:55
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:163
llvm::PHINode::setIncomingValue
void setIncomingValue(unsigned i, Value *V)
Definition: Instructions.h:2669
llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1588
loops
loops
Definition: LoopInfo.cpp:1072
INITIALIZE_PASS_END
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Definition: RegBankSelect.cpp:69
llvm::DominatorTreeBase::applyUpdates
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Definition: GenericDomTree.h:544
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock::getUniqueSuccessor
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:302
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::Dependence::DVEntry::GE
@ GE
Definition: DependenceAnalysis.h:95
Instruction.h
CommandLine.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::LoopBase::getSubLoops
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:143
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1498
llvm::InductionDescriptor
A struct for saving information about induction variables.
Definition: IVDescriptors.h:257
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::createLoopInterchangePass
Pass * createLoopInterchangePass()
Definition: LoopInterchange.cpp:1678
Constants.h
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2666
areInnerLoopExitPHIsSupported
static bool areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, SmallPtrSetImpl< PHINode * > &Reductions)
Definition: LoopInterchange.cpp:898
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
InstrTypes.h
llvm::LoopBase::addChildLoop
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:395
MaxLoopNestDepth
static const unsigned MaxLoopNestDepth
Definition: LoopInterchange.cpp:76
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
swapBBContents
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
Definition: LoopInterchange.cpp:1341
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
populateWorklist
static LoopVector populateWorklist(Loop &L)
Definition: LoopInterchange.cpp:272
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
false
Definition: StackSlotColoring.cpp:142
populateDependencyMatrix
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI)
Definition: LoopInterchange.cpp:88
llvm::Instruction
Definition: Instruction.h:45
isProfitableForVectorization
static bool isProfitableForVectorization(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix)
Definition: LoopInterchange.cpp:1088
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:354
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
LoopUtils.h
llvm::LPPassManager
Definition: LoopPass.h:75
LoopInterchangeCostThreshold
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange", "Interchanges loops for cache reuse", false, false) INITIALIZE_PASS_END(LoopInterchangeLegacyPass
llvm::LoopBase::getExitingBlock
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:212
llvm::LoopBase::removeBlockFromLoop
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
Definition: LoopInfo.h:460
Utils.h
getInductionVariable
static PHINode * getInductionVariable(Loop *L, ScalarEvolution *SE)
Definition: LoopInterchange.cpp:294
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2662
Type.h
llvm::Loop::LoopBounds::Direction
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:697
LoopInfo.h
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:3877
llvm::PHINode::getIncomingValueNumForOperand
static unsigned getIncomingValueNumForOperand(unsigned i)
Definition: Instructions.h:2680
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:202
llvm::InsertPreheaderForLoop
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
Definition: LoopSimplify.cpp:123
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:140
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1419
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::DependenceInfo
DependenceInfo - This class is the main dependence-analysis driver.
Definition: DependenceAnalysis.h:272
llvm::LoopPass
Definition: LoopPass.h:27
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:243
llvm::PHINode::setIncomingBlock
void setIncomingBlock(unsigned i, BasicBlock *BB)
Definition: Instructions.h:2705
containsNoDependence
static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row, unsigned Column)
Definition: LoopInterchange.cpp:208
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::Dependence::DVEntry::GT
@ GT
Definition: DependenceAnalysis.h:93
LoopInterchangeLegacyPass::ID
static char ID
Definition: LoopInterchange.cpp:1640
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:963
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:905
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:581
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::Dependence::DVEntry::LE
@ LE
Definition: DependenceAnalysis.h:92
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1563
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:47
LoopInterchangeLegacyPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: LoopInterchange.cpp:1646
validDepInterchange
static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row, unsigned OuterLoopId, char InnerDep, char OuterDep)
Definition: LoopInterchange.cpp:218
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
llvm::Dependence::DVEntry::LT
@ LT
Definition: DependenceAnalysis.h:90
llvm::Instruction::mayHaveSideEffects
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:626
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:944
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:643
LoopInterchange.h
llvm::Instruction::clone
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
Definition: Instruction.cpp:781
llvm::LoopBase::removeChildLoop
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Definition: LoopInfo.h:404
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1079
llvm::BasicBlock::instructionsWithoutDebug
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=false) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:100
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1505
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
this
Analysis the ScalarEvolution expression for r is this
Definition: README.txt:8
LoopPass.h
llvm::Loop::getCanonicalInductionVariable
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition: LoopInfo.cpp:149
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
llvm::transform
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1595
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:202
llvm::BasicBlock::getUniquePredecessor
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:272
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:56
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::LoopNest::getParent
Function * getParent() const
Return the function to which the loop-nest belongs.
Definition: LoopNestAnalysis.h:143
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:295
for
this could be done in SelectionDAGISel along with other special for
Definition: README.txt:104
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:308
areOuterLoopExitPHIsSupported
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
Definition: LoopInterchange.cpp:923
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
llvm::InductionDescriptor::isInductionPHI
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Definition: IVDescriptors.cpp:1147
llvm::BasicBlock::replacePhiUsesWith
void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
Definition: BasicBlock.cpp:440
llvm::map_range
auto map_range(ContainerTy &&C, FuncTy F)
Definition: STLExtras.h:311
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:138
llvm::ScalarEvolution::forgetLoop
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
Definition: ScalarEvolution.cpp:7084
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:363
interChangeDependencies
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
Definition: LoopInterchange.cpp:187
llvm::Loop::isLCSSAForm
bool isLCSSAForm(const DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:454
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:302
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::ArrayRef::begin
iterator begin() const
Definition: ArrayRef.h:151
reuse
loop Interchanges loops for cache reuse
Definition: LoopInterchange.cpp:1676
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:352
Direction
Loop::LoopBounds::Direction Direction
Definition: LoopInfo.cpp:229
Casting.h
DiagnosticInfo.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::ScalarEvolution::isSCEVable
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition: ScalarEvolution.cpp:3743
llvm::LoopBase::addBlockEntry
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
Definition: LoopInfo.h:423
transform
instcombine should handle this transform
Definition: README.txt:262
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::LoopBase::getNumBackEdges
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:250
updateSuccessor
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
Definition: LoopInterchange.cpp:1360
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:57
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:53
llvm::LoopNest::getLoops
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Definition: LoopNestAnalysis.h:99
ScalarEvolutionExpressions.h
llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:66
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:363
llvm::DependenceInfo::depends
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
Definition: DependenceAnalysis.cpp:3481
llvm::formLCSSAForInstructions
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, IRBuilderBase &Builder, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
Definition: LCSSA.cpp:79
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:684
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
Instructions.h
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::ConstantInt::isNegative
bool isNegative() const
Definition: Constants.h:197
SmallVector.h
User.h
llvm::LoopInfoBase::changeTopLevelLoop
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
Definition: LoopInfo.h:1011
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2686
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::PHINode
Definition: Instructions.h:2572
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:157
llvm::LoopInterchangePass::run
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopInterchange.cpp:1682
llvm::SmallPtrSetImpl< PHINode * >
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:43
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1450
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
DependenceAnalysis.h
LoopInterchangeLegacyPass::runOnLoop
bool runOnLoop(Loop *L, LPPassManager &LPM) override
Definition: LoopInterchange.cpp:1653
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
MaxMemInstrCount
static const unsigned MaxMemInstrCount
Definition: LoopInterchange.cpp:73
llvm::cl::desc
Definition: CommandLine.h:411
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3005
raw_ostream.h
llvm::ScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
Definition: ScalarEvolution.cpp:6916
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:599
Value.h
vector
DEBUG_TYPE to vector
Definition: AMDGPUPromoteAlloca.cpp:132
InitializePasses.h
llvm::LoopNest
This class represents a loop nest and can be used to query its properties.
Definition: LoopNestAnalysis.h:27
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:424
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3098
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:369
llvm::Instruction::moveBefore
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:96
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37