LLVM  4.0.0
LoopInterchange.cpp
Go to the documentation of this file.
1 //===- LoopInterchange.cpp - Loop interchange pass------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This Pass handles loop interchange transform.
11 // This pass interchanges loops to provide a more cache-friendly memory access
12 // patterns.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/Analysis/LoopInfo.h"
24 #include "llvm/Analysis/LoopPass.h"
30 #include "llvm/IR/Dominators.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/IRBuilder.h"
33 #include "llvm/IR/InstIterator.h"
34 #include "llvm/IR/IntrinsicInst.h"
35 #include "llvm/IR/Module.h"
36 #include "llvm/Pass.h"
37 #include "llvm/Support/Debug.h"
39 #include "llvm/Transforms/Scalar.h"
43 using namespace llvm;
44 
45 #define DEBUG_TYPE "loop-interchange"
46 
48  "loop-interchange-threshold", cl::init(0), cl::Hidden,
49  cl::desc("Interchange if you gain more than this number"));
50 
51 namespace {
52 
53 typedef SmallVector<Loop *, 8> LoopVector;
54 
55 // TODO: Check if we can use a sparse matrix here.
56 typedef std::vector<std::vector<char>> CharMatrix;
57 
58 // Maximum number of dependencies that can be handled in the dependency matrix.
59 static const unsigned MaxMemInstrCount = 100;
60 
61 // Maximum loop depth supported.
62 static const unsigned MaxLoopNestDepth = 10;
63 
64 struct LoopInterchange;
65 
66 #ifdef DUMP_DEP_MATRICIES
67 void printDepMatrix(CharMatrix &DepMatrix) {
68  for (auto I = DepMatrix.begin(), E = DepMatrix.end(); I != E; ++I) {
69  std::vector<char> Vec = *I;
70  for (auto II = Vec.begin(), EE = Vec.end(); II != EE; ++II)
71  DEBUG(dbgs() << *II << " ");
72  DEBUG(dbgs() << "\n");
73  }
74 }
75 #endif
76 
77 static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
78  Loop *L, DependenceInfo *DI) {
79  typedef SmallVector<Value *, 16> ValueVector;
80  ValueVector MemInstr;
81 
82  // For each block.
83  for (Loop::block_iterator BB = L->block_begin(), BE = L->block_end();
84  BB != BE; ++BB) {
85  // Scan the BB and collect legal loads and stores.
86  for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E;
87  ++I) {
88  if (!isa<Instruction>(I))
89  return false;
90  if (LoadInst *Ld = dyn_cast<LoadInst>(I)) {
91  if (!Ld->isSimple())
92  return false;
93  MemInstr.push_back(&*I);
94  } else if (StoreInst *St = dyn_cast<StoreInst>(I)) {
95  if (!St->isSimple())
96  return false;
97  MemInstr.push_back(&*I);
98  }
99  }
100  }
101 
102  DEBUG(dbgs() << "Found " << MemInstr.size()
103  << " Loads and Stores to analyze\n");
104 
105  ValueVector::iterator I, IE, J, JE;
106 
107  for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
108  for (J = I, JE = MemInstr.end(); J != JE; ++J) {
109  std::vector<char> Dep;
110  Instruction *Src = cast<Instruction>(*I);
111  Instruction *Dst = cast<Instruction>(*J);
112  if (Src == Dst)
113  continue;
114  // Ignore Input dependencies.
115  if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
116  continue;
117  // Track Output, Flow, and Anti dependencies.
118  if (auto D = DI->depends(Src, Dst, true)) {
119  assert(D->isOrdered() && "Expected an output, flow or anti dep.");
120  DEBUG(StringRef DepType =
121  D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
122  dbgs() << "Found " << DepType
123  << " dependency between Src and Dst\n"
124  << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
125  unsigned Levels = D->getLevels();
126  char Direction;
127  for (unsigned II = 1; II <= Levels; ++II) {
128  const SCEV *Distance = D->getDistance(II);
129  const SCEVConstant *SCEVConst =
130  dyn_cast_or_null<SCEVConstant>(Distance);
131  if (SCEVConst) {
132  const ConstantInt *CI = SCEVConst->getValue();
133  if (CI->isNegative())
134  Direction = '<';
135  else if (CI->isZero())
136  Direction = '=';
137  else
138  Direction = '>';
139  Dep.push_back(Direction);
140  } else if (D->isScalar(II)) {
141  Direction = 'S';
142  Dep.push_back(Direction);
143  } else {
144  unsigned Dir = D->getDirection(II);
145  if (Dir == Dependence::DVEntry::LT ||
147  Direction = '<';
148  else if (Dir == Dependence::DVEntry::GT ||
150  Direction = '>';
151  else if (Dir == Dependence::DVEntry::EQ)
152  Direction = '=';
153  else
154  Direction = '*';
155  Dep.push_back(Direction);
156  }
157  }
158  while (Dep.size() != Level) {
159  Dep.push_back('I');
160  }
161 
162  DepMatrix.push_back(Dep);
163  if (DepMatrix.size() > MaxMemInstrCount) {
164  DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
165  << " dependencies inside loop\n");
166  return false;
167  }
168  }
169  }
170  }
171 
172  // We don't have a DepMatrix to check legality return false.
173  if (DepMatrix.size() == 0)
174  return false;
175  return true;
176 }
177 
178 // A loop is moved from index 'from' to an index 'to'. Update the Dependence
179 // matrix by exchanging the two columns.
180 static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
181  unsigned ToIndx) {
182  unsigned numRows = DepMatrix.size();
183  for (unsigned i = 0; i < numRows; ++i) {
184  char TmpVal = DepMatrix[i][ToIndx];
185  DepMatrix[i][ToIndx] = DepMatrix[i][FromIndx];
186  DepMatrix[i][FromIndx] = TmpVal;
187  }
188 }
189 
190 // Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
191 // '>'
192 static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
193  unsigned Column) {
194  for (unsigned i = 0; i <= Column; ++i) {
195  if (DepMatrix[Row][i] == '<')
196  return false;
197  if (DepMatrix[Row][i] == '>')
198  return true;
199  }
200  // All dependencies were '=','S' or 'I'
201  return false;
202 }
203 
204 // Checks if no dependence exist in the dependency matrix in Row before Column.
205 static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
206  unsigned Column) {
207  for (unsigned i = 0; i < Column; ++i) {
208  if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' &&
209  DepMatrix[Row][i] != 'I')
210  return false;
211  }
212  return true;
213 }
214 
215 static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
216  unsigned OuterLoopId, char InnerDep,
217  char OuterDep) {
218 
219  if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
220  return false;
221 
222  if (InnerDep == OuterDep)
223  return true;
224 
225  // It is legal to interchange if and only if after interchange no row has a
226  // '>' direction as the leftmost non-'='.
227 
228  if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
229  return true;
230 
231  if (InnerDep == '<')
232  return true;
233 
234  if (InnerDep == '>') {
235  // If OuterLoopId represents outermost loop then interchanging will make the
236  // 1st dependency as '>'
237  if (OuterLoopId == 0)
238  return false;
239 
240  // If all dependencies before OuterloopId are '=','S'or 'I'. Then
241  // interchanging will result in this row having an outermost non '='
242  // dependency of '>'
243  if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
244  return true;
245  }
246 
247  return false;
248 }
249 
250 // Checks if it is legal to interchange 2 loops.
251 // [Theorem] A permutation of the loops in a perfect nest is legal if and only
252 // if the direction matrix, after the same permutation is applied to its
253 // columns, has no ">" direction as the leftmost non-"=" direction in any row.
254 static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
255  unsigned InnerLoopId,
256  unsigned OuterLoopId) {
257 
258  unsigned NumRows = DepMatrix.size();
259  // For each row check if it is valid to interchange.
260  for (unsigned Row = 0; Row < NumRows; ++Row) {
261  char InnerDep = DepMatrix[Row][InnerLoopId];
262  char OuterDep = DepMatrix[Row][OuterLoopId];
263  if (InnerDep == '*' || OuterDep == '*')
264  return false;
265  if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))
266  return false;
267  }
268  return true;
269 }
270 
271 static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) {
272 
273  DEBUG(dbgs() << "Calling populateWorklist on Func: "
274  << L.getHeader()->getParent()->getName() << " Loop: %"
275  << L.getHeader()->getName() << '\n');
276  LoopVector LoopList;
277  Loop *CurrentLoop = &L;
278  const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
279  while (!Vec->empty()) {
280  // The current loop has multiple subloops in it hence it is not tightly
281  // nested.
282  // Discard all loops above it added into Worklist.
283  if (Vec->size() != 1) {
284  LoopList.clear();
285  return;
286  }
287  LoopList.push_back(CurrentLoop);
288  CurrentLoop = Vec->front();
289  Vec = &CurrentLoop->getSubLoops();
290  }
291  LoopList.push_back(CurrentLoop);
292  V.push_back(std::move(LoopList));
293 }
294 
295 static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
296  PHINode *InnerIndexVar = L->getCanonicalInductionVariable();
297  if (InnerIndexVar)
298  return InnerIndexVar;
299  if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr)
300  return nullptr;
301  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
302  PHINode *PhiVar = cast<PHINode>(I);
303  Type *PhiTy = PhiVar->getType();
304  if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
305  !PhiTy->isPointerTy())
306  return nullptr;
307  const SCEVAddRecExpr *AddRec =
308  dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar));
309  if (!AddRec || !AddRec->isAffine())
310  continue;
311  const SCEV *Step = AddRec->getStepRecurrence(*SE);
312  if (!isa<SCEVConstant>(Step))
313  continue;
314  // Found the induction variable.
315  // FIXME: Handle loops with more than one induction variable. Note that,
316  // currently, legality makes sure we have only one induction variable.
317  return PhiVar;
318  }
319  return nullptr;
320 }
321 
322 /// LoopInterchangeLegality checks if it is legal to interchange the loop.
323 class LoopInterchangeLegality {
324 public:
325  LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
326  LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA)
327  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
328  PreserveLCSSA(PreserveLCSSA), InnerLoopHasReduction(false) {}
329 
330  /// Check if the loops can be interchanged.
331  bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
332  CharMatrix &DepMatrix);
333  /// Check if the loop structure is understood. We do not handle triangular
334  /// loops for now.
335  bool isLoopStructureUnderstood(PHINode *InnerInductionVar);
336 
337  bool currentLimitations();
338 
339  bool hasInnerLoopReduction() { return InnerLoopHasReduction; }
340 
341 private:
342  bool tightlyNested(Loop *Outer, Loop *Inner);
343  bool containsUnsafeInstructionsInHeader(BasicBlock *BB);
344  bool areAllUsesReductions(Instruction *Ins, Loop *L);
345  bool containsUnsafeInstructionsInLatch(BasicBlock *BB);
346  bool findInductionAndReductions(Loop *L,
347  SmallVector<PHINode *, 8> &Inductions,
348  SmallVector<PHINode *, 8> &Reductions);
349  Loop *OuterLoop;
350  Loop *InnerLoop;
351 
352  ScalarEvolution *SE;
353  LoopInfo *LI;
354  DominatorTree *DT;
355  bool PreserveLCSSA;
356 
357  bool InnerLoopHasReduction;
358 };
359 
360 /// LoopInterchangeProfitability checks if it is profitable to interchange the
361 /// loop.
362 class LoopInterchangeProfitability {
363 public:
364  LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE)
365  : OuterLoop(Outer), InnerLoop(Inner), SE(SE) {}
366 
367  /// Check if the loop interchange is profitable.
368  bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
369  CharMatrix &DepMatrix);
370 
371 private:
372  int getInstrOrderCost();
373 
374  Loop *OuterLoop;
375  Loop *InnerLoop;
376 
377  /// Scev analysis.
378  ScalarEvolution *SE;
379 };
380 
381 /// LoopInterchangeTransform interchanges the loop.
382 class LoopInterchangeTransform {
383 public:
384  LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
385  LoopInfo *LI, DominatorTree *DT,
386  BasicBlock *LoopNestExit,
387  bool InnerLoopContainsReductions)
388  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
389  LoopExit(LoopNestExit),
390  InnerLoopHasReduction(InnerLoopContainsReductions) {}
391 
392  /// Interchange OuterLoop and InnerLoop.
393  bool transform();
394  void restructureLoops(Loop *InnerLoop, Loop *OuterLoop);
395  void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
396 
397 private:
398  void splitInnerLoopLatch(Instruction *);
399  void splitInnerLoopHeader();
400  bool adjustLoopLinks();
401  void adjustLoopPreheaders();
402  bool adjustLoopBranches();
403  void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred,
404  BasicBlock *NewPred);
405 
406  Loop *OuterLoop;
407  Loop *InnerLoop;
408 
409  /// Scev analysis.
410  ScalarEvolution *SE;
411  LoopInfo *LI;
412  DominatorTree *DT;
413  BasicBlock *LoopExit;
414  bool InnerLoopHasReduction;
415 };
416 
417 // Main LoopInterchange Pass.
418 struct LoopInterchange : public FunctionPass {
419  static char ID;
420  ScalarEvolution *SE;
421  LoopInfo *LI;
422  DependenceInfo *DI;
423  DominatorTree *DT;
424  bool PreserveLCSSA;
425  LoopInterchange()
426  : FunctionPass(ID), SE(nullptr), LI(nullptr), DI(nullptr), DT(nullptr) {
428  }
429 
430  void getAnalysisUsage(AnalysisUsage &AU) const override {
438  }
439 
440  bool runOnFunction(Function &F) override {
441  if (skipFunction(F))
442  return false;
443 
444  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
445  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
446  DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
447  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
448  DT = DTWP ? &DTWP->getDomTree() : nullptr;
449  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
450 
451  // Build up a worklist of loop pairs to analyze.
453 
454  for (Loop *L : *LI)
455  populateWorklist(*L, Worklist);
456 
457  DEBUG(dbgs() << "Worklist size = " << Worklist.size() << "\n");
458  bool Changed = true;
459  while (!Worklist.empty()) {
460  LoopVector LoopList = Worklist.pop_back_val();
461  Changed = processLoopList(LoopList, F);
462  }
463  return Changed;
464  }
465 
466  bool isComputableLoopNest(LoopVector LoopList) {
467  for (Loop *L : LoopList) {
468  const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
469  if (ExitCountOuter == SE->getCouldNotCompute()) {
470  DEBUG(dbgs() << "Couldn't compute backedge count\n");
471  return false;
472  }
473  if (L->getNumBackEdges() != 1) {
474  DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
475  return false;
476  }
477  if (!L->getExitingBlock()) {
478  DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
479  return false;
480  }
481  }
482  return true;
483  }
484 
485  unsigned selectLoopForInterchange(const LoopVector &LoopList) {
486  // TODO: Add a better heuristic to select the loop to be interchanged based
487  // on the dependence matrix. Currently we select the innermost loop.
488  return LoopList.size() - 1;
489  }
490 
491  bool processLoopList(LoopVector LoopList, Function &F) {
492 
493  bool Changed = false;
494  unsigned LoopNestDepth = LoopList.size();
495  if (LoopNestDepth < 2) {
496  DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
497  return false;
498  }
499  if (LoopNestDepth > MaxLoopNestDepth) {
500  DEBUG(dbgs() << "Cannot handle loops of depth greater than "
501  << MaxLoopNestDepth << "\n");
502  return false;
503  }
504  if (!isComputableLoopNest(LoopList)) {
505  DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
506  return false;
507  }
508 
509  DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth << "\n");
510 
511  CharMatrix DependencyMatrix;
512  Loop *OuterMostLoop = *(LoopList.begin());
513  if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
514  OuterMostLoop, DI)) {
515  DEBUG(dbgs() << "Populating dependency matrix failed\n");
516  return false;
517  }
518 #ifdef DUMP_DEP_MATRICIES
519  DEBUG(dbgs() << "Dependence before interchange\n");
520  printDepMatrix(DependencyMatrix);
521 #endif
522 
523  BasicBlock *OuterMostLoopLatch = OuterMostLoop->getLoopLatch();
524  BranchInst *OuterMostLoopLatchBI =
525  dyn_cast<BranchInst>(OuterMostLoopLatch->getTerminator());
526  if (!OuterMostLoopLatchBI)
527  return false;
528 
529  // Since we currently do not handle LCSSA PHI's any failure in loop
530  // condition will now branch to LoopNestExit.
531  // TODO: This should be removed once we handle LCSSA PHI nodes.
532 
533  // Get the Outermost loop exit.
534  BasicBlock *LoopNestExit;
535  if (OuterMostLoopLatchBI->getSuccessor(0) == OuterMostLoop->getHeader())
536  LoopNestExit = OuterMostLoopLatchBI->getSuccessor(1);
537  else
538  LoopNestExit = OuterMostLoopLatchBI->getSuccessor(0);
539 
540  if (isa<PHINode>(LoopNestExit->begin())) {
541  DEBUG(dbgs() << "PHI Nodes in loop nest exit is not handled for now "
542  "since on failure all loops branch to loop nest exit.\n");
543  return false;
544  }
545 
546  unsigned SelecLoopId = selectLoopForInterchange(LoopList);
547  // Move the selected loop outwards to the best possible position.
548  for (unsigned i = SelecLoopId; i > 0; i--) {
549  bool Interchanged =
550  processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix);
551  if (!Interchanged)
552  return Changed;
553  // Loops interchanged reflect the same in LoopList
554  std::swap(LoopList[i - 1], LoopList[i]);
555 
556  // Update the DependencyMatrix
557  interChangeDependencies(DependencyMatrix, i, i - 1);
558  DT->recalculate(F);
559 #ifdef DUMP_DEP_MATRICIES
560  DEBUG(dbgs() << "Dependence after interchange\n");
561  printDepMatrix(DependencyMatrix);
562 #endif
563  Changed |= Interchanged;
564  }
565  return Changed;
566  }
567 
568  bool processLoop(LoopVector LoopList, unsigned InnerLoopId,
569  unsigned OuterLoopId, BasicBlock *LoopNestExit,
570  std::vector<std::vector<char>> &DependencyMatrix) {
571 
572  DEBUG(dbgs() << "Processing Inner Loop Id = " << InnerLoopId
573  << " and OuterLoopId = " << OuterLoopId << "\n");
574  Loop *InnerLoop = LoopList[InnerLoopId];
575  Loop *OuterLoop = LoopList[OuterLoopId];
576 
577  LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, LI, DT,
578  PreserveLCSSA);
579  if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
580  DEBUG(dbgs() << "Not interchanging Loops. Cannot prove legality\n");
581  return false;
582  }
583  DEBUG(dbgs() << "Loops are legal to interchange\n");
584  LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE);
585  if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
586  DEBUG(dbgs() << "Interchanging loops not profitable\n");
587  return false;
588  }
589 
590  LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT,
591  LoopNestExit, LIL.hasInnerLoopReduction());
592  LIT.transform();
593  DEBUG(dbgs() << "Loops interchanged\n");
594  return true;
595  }
596 };
597 
598 } // end of namespace
599 bool LoopInterchangeLegality::areAllUsesReductions(Instruction *Ins, Loop *L) {
600  return none_of(Ins->users(), [=](User *U) -> bool {
601  auto *UserIns = dyn_cast<PHINode>(U);
603  return !UserIns || !RecurrenceDescriptor::isReductionPHI(UserIns, L, RD);
604  });
605 }
606 
607 bool LoopInterchangeLegality::containsUnsafeInstructionsInHeader(
608  BasicBlock *BB) {
609  for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
610  // Load corresponding to reduction PHI's are safe while concluding if
611  // tightly nested.
612  if (LoadInst *L = dyn_cast<LoadInst>(I)) {
613  if (!areAllUsesReductions(L, InnerLoop))
614  return true;
615  } else if (I->mayHaveSideEffects() || I->mayReadFromMemory())
616  return true;
617  }
618  return false;
619 }
620 
621 bool LoopInterchangeLegality::containsUnsafeInstructionsInLatch(
622  BasicBlock *BB) {
623  for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
624  // Stores corresponding to reductions are safe while concluding if tightly
625  // nested.
626  if (StoreInst *L = dyn_cast<StoreInst>(I)) {
627  if (!isa<PHINode>(L->getOperand(0)))
628  return true;
629  } else if (I->mayHaveSideEffects() || I->mayReadFromMemory())
630  return true;
631  }
632  return false;
633 }
634 
635 bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
636  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
637  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
638  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
639 
640  DEBUG(dbgs() << "Checking if loops are tightly nested\n");
641 
642  // A perfectly nested loop will not have any branch in between the outer and
643  // inner block i.e. outer header will branch to either inner preheader and
644  // outerloop latch.
645  BranchInst *OuterLoopHeaderBI =
646  dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
647  if (!OuterLoopHeaderBI)
648  return false;
649 
650  for (unsigned i = 0, e = OuterLoopHeaderBI->getNumSuccessors(); i < e; ++i) {
651  if (OuterLoopHeaderBI->getSuccessor(i) != InnerLoopPreHeader &&
652  OuterLoopHeaderBI->getSuccessor(i) != OuterLoopLatch)
653  return false;
654  }
655 
656  DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
657  // We do not have any basic block in between now make sure the outer header
658  // and outer loop latch doesn't contain any unsafe instructions.
659  if (containsUnsafeInstructionsInHeader(OuterLoopHeader) ||
660  containsUnsafeInstructionsInLatch(OuterLoopLatch))
661  return false;
662 
663  DEBUG(dbgs() << "Loops are perfectly nested\n");
664  // We have a perfect loop nest.
665  return true;
666 }
667 
668 
669 bool LoopInterchangeLegality::isLoopStructureUnderstood(
670  PHINode *InnerInduction) {
671 
672  unsigned Num = InnerInduction->getNumOperands();
673  BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
674  for (unsigned i = 0; i < Num; ++i) {
675  Value *Val = InnerInduction->getOperand(i);
676  if (isa<Constant>(Val))
677  continue;
678  Instruction *I = dyn_cast<Instruction>(Val);
679  if (!I)
680  return false;
681  // TODO: Handle triangular loops.
682  // e.g. for(int i=0;i<N;i++)
683  // for(int j=i;j<N;j++)
684  unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
685  if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
686  InnerLoopPreheader &&
687  !OuterLoop->isLoopInvariant(I)) {
688  return false;
689  }
690  }
691  return true;
692 }
693 
694 bool LoopInterchangeLegality::findInductionAndReductions(
695  Loop *L, SmallVector<PHINode *, 8> &Inductions,
696  SmallVector<PHINode *, 8> &Reductions) {
697  if (!L->getLoopLatch() || !L->getLoopPredecessor())
698  return false;
699  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
702  PHINode *PHI = cast<PHINode>(I);
703  if (InductionDescriptor::isInductionPHI(PHI, L, SE, ID))
704  Inductions.push_back(PHI);
705  else if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
706  Reductions.push_back(PHI);
707  else {
708  DEBUG(
709  dbgs() << "Failed to recognize PHI as an induction or reduction.\n");
710  return false;
711  }
712  }
713  return true;
714 }
715 
716 static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock) {
717  for (auto I = Block->begin(); isa<PHINode>(I); ++I) {
718  PHINode *PHI = cast<PHINode>(I);
719  // Reduction lcssa phi will have only 1 incoming block that from loop latch.
720  if (PHI->getNumIncomingValues() > 1)
721  return false;
723  if (!Ins)
724  return false;
725  // Incoming value for lcssa phi's in outer loop exit can only be inner loop
726  // exits lcssa phi else it would not be tightly nested.
727  if (!isa<PHINode>(Ins) && isOuterLoopExitBlock)
728  return false;
729  }
730  return true;
731 }
732 
734  BasicBlock *LoopHeader) {
735  if (BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator())) {
736  unsigned Num = BI->getNumSuccessors();
737  assert(Num == 2);
738  for (unsigned i = 0; i < Num; ++i) {
739  if (BI->getSuccessor(i) == LoopHeader)
740  continue;
741  return BI->getSuccessor(i);
742  }
743  }
744  return nullptr;
745 }
746 
747 // This function indicates the current limitations in the transform as a result
748 // of which we do not proceed.
749 bool LoopInterchangeLegality::currentLimitations() {
750 
751  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
752  BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
753  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
754  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
755  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
756 
757  PHINode *InnerInductionVar;
758  SmallVector<PHINode *, 8> Inductions;
759  SmallVector<PHINode *, 8> Reductions;
760  if (!findInductionAndReductions(InnerLoop, Inductions, Reductions))
761  return true;
762 
763  // TODO: Currently we handle only loops with 1 induction variable.
764  if (Inductions.size() != 1) {
765  DEBUG(dbgs() << "We currently only support loops with 1 induction variable."
766  << "Failed to interchange due to current limitation\n");
767  return true;
768  }
769  if (Reductions.size() > 0)
770  InnerLoopHasReduction = true;
771 
772  InnerInductionVar = Inductions.pop_back_val();
773  Reductions.clear();
774  if (!findInductionAndReductions(OuterLoop, Inductions, Reductions))
775  return true;
776 
777  // Outer loop cannot have reduction because then loops will not be tightly
778  // nested.
779  if (!Reductions.empty())
780  return true;
781  // TODO: Currently we handle only loops with 1 induction variable.
782  if (Inductions.size() != 1)
783  return true;
784 
785  // TODO: Triangular loops are not handled for now.
786  if (!isLoopStructureUnderstood(InnerInductionVar)) {
787  DEBUG(dbgs() << "Loop structure not understood by pass\n");
788  return true;
789  }
790 
791  // TODO: We only handle LCSSA PHI's corresponding to reduction for now.
792  BasicBlock *LoopExitBlock =
793  getLoopLatchExitBlock(OuterLoopLatch, OuterLoopHeader);
794  if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true))
795  return true;
796 
797  LoopExitBlock = getLoopLatchExitBlock(InnerLoopLatch, InnerLoopHeader);
798  if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false))
799  return true;
800 
801  // TODO: Current limitation: Since we split the inner loop latch at the point
802  // were induction variable is incremented (induction.next); We cannot have
803  // more than 1 user of induction.next since it would result in broken code
804  // after split.
805  // e.g.
806  // for(i=0;i<N;i++) {
807  // for(j = 0;j<M;j++) {
808  // A[j+1][i+2] = A[j][i]+k;
809  // }
810  // }
811  Instruction *InnerIndexVarInc = nullptr;
812  if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader)
813  InnerIndexVarInc =
814  dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1));
815  else
816  InnerIndexVarInc =
817  dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0));
818 
819  if (!InnerIndexVarInc)
820  return true;
821 
822  // Since we split the inner loop latch on this induction variable. Make sure
823  // we do not have any instruction between the induction variable and branch
824  // instruction.
825 
826  bool FoundInduction = false;
827  for (const Instruction &I : reverse(*InnerLoopLatch)) {
828  if (isa<BranchInst>(I) || isa<CmpInst>(I) || isa<TruncInst>(I))
829  continue;
830  // We found an instruction. If this is not induction variable then it is not
831  // safe to split this loop latch.
832  if (!I.isIdenticalTo(InnerIndexVarInc))
833  return true;
834 
835  FoundInduction = true;
836  break;
837  }
838  // The loop latch ended and we didn't find the induction variable return as
839  // current limitation.
840  if (!FoundInduction)
841  return true;
842 
843  return false;
844 }
845 
846 bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
847  unsigned OuterLoopId,
848  CharMatrix &DepMatrix) {
849 
850  if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
851  DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
852  << " and OuterLoopId = " << OuterLoopId
853  << " due to dependence\n");
854  return false;
855  }
856 
857  // Create unique Preheaders if we already do not have one.
858  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
859  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
860 
861  // Create a unique outer preheader -
862  // 1) If OuterLoop preheader is not present.
863  // 2) If OuterLoop Preheader is same as OuterLoop Header
864  // 3) If OuterLoop Preheader is same as Header of the previous loop.
865  // 4) If OuterLoop Preheader is Entry node.
866  if (!OuterLoopPreHeader || OuterLoopPreHeader == OuterLoop->getHeader() ||
867  isa<PHINode>(OuterLoopPreHeader->begin()) ||
868  !OuterLoopPreHeader->getUniquePredecessor()) {
869  OuterLoopPreHeader =
870  InsertPreheaderForLoop(OuterLoop, DT, LI, PreserveLCSSA);
871  }
872 
873  if (!InnerLoopPreHeader || InnerLoopPreHeader == InnerLoop->getHeader() ||
874  InnerLoopPreHeader == OuterLoop->getHeader()) {
875  InnerLoopPreHeader =
876  InsertPreheaderForLoop(InnerLoop, DT, LI, PreserveLCSSA);
877  }
878 
879  // TODO: The loops could not be interchanged due to current limitations in the
880  // transform module.
881  if (currentLimitations()) {
882  DEBUG(dbgs() << "Not legal because of current transform limitation\n");
883  return false;
884  }
885 
886  // Check if the loops are tightly nested.
887  if (!tightlyNested(OuterLoop, InnerLoop)) {
888  DEBUG(dbgs() << "Loops not tightly nested\n");
889  return false;
890  }
891 
892  return true;
893 }
894 
895 int LoopInterchangeProfitability::getInstrOrderCost() {
896  unsigned GoodOrder, BadOrder;
897  BadOrder = GoodOrder = 0;
898  for (auto BI = InnerLoop->block_begin(), BE = InnerLoop->block_end();
899  BI != BE; ++BI) {
900  for (Instruction &Ins : **BI) {
901  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
902  unsigned NumOp = GEP->getNumOperands();
903  bool FoundInnerInduction = false;
904  bool FoundOuterInduction = false;
905  for (unsigned i = 0; i < NumOp; ++i) {
906  const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
907  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
908  if (!AR)
909  continue;
910 
911  // If we find the inner induction after an outer induction e.g.
912  // for(int i=0;i<N;i++)
913  // for(int j=0;j<N;j++)
914  // A[i][j] = A[i-1][j-1]+k;
915  // then it is a good order.
916  if (AR->getLoop() == InnerLoop) {
917  // We found an InnerLoop induction after OuterLoop induction. It is
918  // a good order.
919  FoundInnerInduction = true;
920  if (FoundOuterInduction) {
921  GoodOrder++;
922  break;
923  }
924  }
925  // If we find the outer induction after an inner induction e.g.
926  // for(int i=0;i<N;i++)
927  // for(int j=0;j<N;j++)
928  // A[j][i] = A[j-1][i-1]+k;
929  // then it is a bad order.
930  if (AR->getLoop() == OuterLoop) {
931  // We found an OuterLoop induction after InnerLoop induction. It is
932  // a bad order.
933  FoundOuterInduction = true;
934  if (FoundInnerInduction) {
935  BadOrder++;
936  break;
937  }
938  }
939  }
940  }
941  }
942  }
943  return GoodOrder - BadOrder;
944 }
945 
946 static bool isProfitableForVectorization(unsigned InnerLoopId,
947  unsigned OuterLoopId,
948  CharMatrix &DepMatrix) {
949  // TODO: Improve this heuristic to catch more cases.
950  // If the inner loop is loop independent or doesn't carry any dependency it is
951  // profitable to move this to outer position.
952  unsigned Row = DepMatrix.size();
953  for (unsigned i = 0; i < Row; ++i) {
954  if (DepMatrix[i][InnerLoopId] != 'S' && DepMatrix[i][InnerLoopId] != 'I')
955  return false;
956  // TODO: We need to improve this heuristic.
957  if (DepMatrix[i][OuterLoopId] != '=')
958  return false;
959  }
960  // If outer loop has dependence and inner loop is loop independent then it is
961  // profitable to interchange to enable parallelism.
962  return true;
963 }
964 
965 bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
966  unsigned OuterLoopId,
967  CharMatrix &DepMatrix) {
968 
969  // TODO: Add better profitability checks.
970  // e.g
971  // 1) Construct dependency matrix and move the one with no loop carried dep
972  // inside to enable vectorization.
973 
974  // This is rough cost estimation algorithm. It counts the good and bad order
975  // of induction variables in the instruction and allows reordering if number
976  // of bad orders is more than good.
977  int Cost = getInstrOrderCost();
978  DEBUG(dbgs() << "Cost = " << Cost << "\n");
979  if (Cost < -LoopInterchangeCostThreshold)
980  return true;
981 
982  // It is not profitable as per current cache profitability model. But check if
983  // we can move this loop outside to improve parallelism.
984  bool ImprovesPar =
985  isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
986  return ImprovesPar;
987 }
988 
989 void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
990  Loop *InnerLoop) {
991  for (Loop::iterator I = OuterLoop->begin(), E = OuterLoop->end(); I != E;
992  ++I) {
993  if (*I == InnerLoop) {
994  OuterLoop->removeChildLoop(I);
995  return;
996  }
997  }
998  llvm_unreachable("Couldn't find loop");
999 }
1000 
1001 void LoopInterchangeTransform::restructureLoops(Loop *InnerLoop,
1002  Loop *OuterLoop) {
1003  Loop *OuterLoopParent = OuterLoop->getParentLoop();
1004  if (OuterLoopParent) {
1005  // Remove the loop from its parent loop.
1006  removeChildLoop(OuterLoopParent, OuterLoop);
1007  removeChildLoop(OuterLoop, InnerLoop);
1008  OuterLoopParent->addChildLoop(InnerLoop);
1009  } else {
1010  removeChildLoop(OuterLoop, InnerLoop);
1011  LI->changeTopLevelLoop(OuterLoop, InnerLoop);
1012  }
1013 
1014  while (!InnerLoop->empty())
1015  OuterLoop->addChildLoop(InnerLoop->removeChildLoop(InnerLoop->begin()));
1016 
1017  InnerLoop->addChildLoop(OuterLoop);
1018 }
1019 
1021  bool Transformed = false;
1022  Instruction *InnerIndexVar;
1023 
1024  if (InnerLoop->getSubLoops().size() == 0) {
1025  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1026  DEBUG(dbgs() << "Calling Split Inner Loop\n");
1027  PHINode *InductionPHI = getInductionVariable(InnerLoop, SE);
1028  if (!InductionPHI) {
1029  DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
1030  return false;
1031  }
1032 
1033  if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1034  InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(1));
1035  else
1036  InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0));
1037 
1038  //
1039  // Split at the place were the induction variable is
1040  // incremented/decremented.
1041  // TODO: This splitting logic may not work always. Fix this.
1042  splitInnerLoopLatch(InnerIndexVar);
1043  DEBUG(dbgs() << "splitInnerLoopLatch done\n");
1044 
1045  // Splits the inner loops phi nodes out into a separate basic block.
1046  splitInnerLoopHeader();
1047  DEBUG(dbgs() << "splitInnerLoopHeader done\n");
1048  }
1049 
1050  Transformed |= adjustLoopLinks();
1051  if (!Transformed) {
1052  DEBUG(dbgs() << "adjustLoopLinks failed\n");
1053  return false;
1054  }
1055 
1056  restructureLoops(InnerLoop, OuterLoop);
1057  return true;
1058 }
1059 
1060 void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) {
1061  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1062  BasicBlock *InnerLoopLatchPred = InnerLoopLatch;
1063  InnerLoopLatch = SplitBlock(InnerLoopLatchPred, Inc, DT, LI);
1064 }
1065 
1066 void LoopInterchangeTransform::splitInnerLoopHeader() {
1067 
1068  // Split the inner loop header out. Here make sure that the reduction PHI's
1069  // stay in the innerloop body.
1070  BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1071  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1072  if (InnerLoopHasReduction) {
1073  // FIXME: Check if the induction PHI will always be the first PHI.
1074  BasicBlock *New = InnerLoopHeader->splitBasicBlock(
1075  ++(InnerLoopHeader->begin()), InnerLoopHeader->getName() + ".split");
1076  if (LI)
1077  if (Loop *L = LI->getLoopFor(InnerLoopHeader))
1078  L->addBasicBlockToLoop(New, *LI);
1079 
1080  // Adjust Reduction PHI's in the block.
1082  for (auto I = New->begin(); isa<PHINode>(I); ++I) {
1083  PHINode *PHI = dyn_cast<PHINode>(I);
1084  Value *V = PHI->getIncomingValueForBlock(InnerLoopPreHeader);
1085  PHI->replaceAllUsesWith(V);
1086  PHIVec.push_back((PHI));
1087  }
1088  for (PHINode *P : PHIVec) {
1089  P->eraseFromParent();
1090  }
1091  } else {
1092  SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1093  }
1094 
1095  DEBUG(dbgs() << "Output of splitInnerLoopHeader InnerLoopHeaderSucc & "
1096  "InnerLoopHeader\n");
1097 }
1098 
1099 /// \brief Move all instructions except the terminator from FromBB right before
1100 /// InsertBefore
1101 static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1102  auto &ToList = InsertBefore->getParent()->getInstList();
1103  auto &FromList = FromBB->getInstList();
1104 
1105  ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
1106  FromBB->getTerminator()->getIterator());
1107 }
1108 
1109 void LoopInterchangeTransform::updateIncomingBlock(BasicBlock *CurrBlock,
1110  BasicBlock *OldPred,
1111  BasicBlock *NewPred) {
1112  for (auto I = CurrBlock->begin(); isa<PHINode>(I); ++I) {
1113  PHINode *PHI = cast<PHINode>(I);
1114  unsigned Num = PHI->getNumIncomingValues();
1115  for (unsigned i = 0; i < Num; ++i) {
1116  if (PHI->getIncomingBlock(i) == OldPred)
1117  PHI->setIncomingBlock(i, NewPred);
1118  }
1119  }
1120 }
1121 
1122 bool LoopInterchangeTransform::adjustLoopBranches() {
1123 
1124  DEBUG(dbgs() << "adjustLoopBranches called\n");
1125  // Adjust the loop preheader
1126  BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1127  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1128  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1129  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1130  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1131  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1132  BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1133  BasicBlock *InnerLoopLatchPredecessor =
1134  InnerLoopLatch->getUniquePredecessor();
1135  BasicBlock *InnerLoopLatchSuccessor;
1136  BasicBlock *OuterLoopLatchSuccessor;
1137 
1138  BranchInst *OuterLoopLatchBI =
1139  dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1140  BranchInst *InnerLoopLatchBI =
1141  dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1142  BranchInst *OuterLoopHeaderBI =
1143  dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1144  BranchInst *InnerLoopHeaderBI =
1145  dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1146 
1147  if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1148  !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1149  !InnerLoopHeaderBI)
1150  return false;
1151 
1152  BranchInst *InnerLoopLatchPredecessorBI =
1153  dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1154  BranchInst *OuterLoopPredecessorBI =
1155  dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1156 
1157  if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1158  return false;
1159  BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1160  if (!InnerLoopHeaderSuccessor)
1161  return false;
1162 
1163  // Adjust Loop Preheader and headers
1164 
1165  unsigned NumSucc = OuterLoopPredecessorBI->getNumSuccessors();
1166  for (unsigned i = 0; i < NumSucc; ++i) {
1167  if (OuterLoopPredecessorBI->getSuccessor(i) == OuterLoopPreHeader)
1168  OuterLoopPredecessorBI->setSuccessor(i, InnerLoopPreHeader);
1169  }
1170 
1171  NumSucc = OuterLoopHeaderBI->getNumSuccessors();
1172  for (unsigned i = 0; i < NumSucc; ++i) {
1173  if (OuterLoopHeaderBI->getSuccessor(i) == OuterLoopLatch)
1174  OuterLoopHeaderBI->setSuccessor(i, LoopExit);
1175  else if (OuterLoopHeaderBI->getSuccessor(i) == InnerLoopPreHeader)
1176  OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSuccessor);
1177  }
1178 
1179  // Adjust reduction PHI's now that the incoming block has changed.
1180  updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader,
1181  OuterLoopHeader);
1182 
1183  BranchInst::Create(OuterLoopPreHeader, InnerLoopHeaderBI);
1184  InnerLoopHeaderBI->eraseFromParent();
1185 
1186  // -------------Adjust loop latches-----------
1187  if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1188  InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1189  else
1190  InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1191 
1192  NumSucc = InnerLoopLatchPredecessorBI->getNumSuccessors();
1193  for (unsigned i = 0; i < NumSucc; ++i) {
1194  if (InnerLoopLatchPredecessorBI->getSuccessor(i) == InnerLoopLatch)
1195  InnerLoopLatchPredecessorBI->setSuccessor(i, InnerLoopLatchSuccessor);
1196  }
1197 
1198  // Adjust PHI nodes in InnerLoopLatchSuccessor. Update all uses of PHI with
1199  // the value and remove this PHI node from inner loop.
1200  SmallVector<PHINode *, 8> LcssaVec;
1201  for (auto I = InnerLoopLatchSuccessor->begin(); isa<PHINode>(I); ++I) {
1202  PHINode *LcssaPhi = cast<PHINode>(I);
1203  LcssaVec.push_back(LcssaPhi);
1204  }
1205  for (PHINode *P : LcssaVec) {
1206  Value *Incoming = P->getIncomingValueForBlock(InnerLoopLatch);
1207  P->replaceAllUsesWith(Incoming);
1208  P->eraseFromParent();
1209  }
1210 
1211  if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1212  OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1213  else
1214  OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1215 
1216  if (InnerLoopLatchBI->getSuccessor(1) == InnerLoopLatchSuccessor)
1217  InnerLoopLatchBI->setSuccessor(1, OuterLoopLatchSuccessor);
1218  else
1219  InnerLoopLatchBI->setSuccessor(0, OuterLoopLatchSuccessor);
1220 
1221  updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch);
1222 
1223  if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopLatchSuccessor) {
1224  OuterLoopLatchBI->setSuccessor(0, InnerLoopLatch);
1225  } else {
1226  OuterLoopLatchBI->setSuccessor(1, InnerLoopLatch);
1227  }
1228 
1229  return true;
1230 }
1231 void LoopInterchangeTransform::adjustLoopPreheaders() {
1232 
1233  // We have interchanged the preheaders so we need to interchange the data in
1234  // the preheader as well.
1235  // This is because the content of inner preheader was previously executed
1236  // inside the outer loop.
1237  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1238  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1239  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1240  BranchInst *InnerTermBI =
1241  cast<BranchInst>(InnerLoopPreHeader->getTerminator());
1242 
1243  // These instructions should now be executed inside the loop.
1244  // Move instruction into a new block after outer header.
1245  moveBBContents(InnerLoopPreHeader, OuterLoopHeader->getTerminator());
1246  // These instructions were not executed previously in the loop so move them to
1247  // the older inner loop preheader.
1248  moveBBContents(OuterLoopPreHeader, InnerTermBI);
1249 }
1250 
1251 bool LoopInterchangeTransform::adjustLoopLinks() {
1252 
1253  // Adjust all branches in the inner and outer loop.
1254  bool Changed = adjustLoopBranches();
1255  if (Changed)
1256  adjustLoopPreheaders();
1257  return Changed;
1258 }
1259 
1260 char LoopInterchange::ID = 0;
1261 INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",
1262  "Interchanges loops for cache reuse", false, false)
1267 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
1268 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
1270 
1271 INITIALIZE_PASS_END(LoopInterchange, "loop-interchange",
1272  "Interchanges loops for cache reuse", false, false)
1273 
1274 Pass *llvm::createLoopInterchangePass() { return new LoopInterchange(); }
MachineLoop * L
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:185
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:81
loop Interchanges loops for cache false
void push_back(const T &Elt)
Definition: SmallVector.h:211
BasicBlock * getUniqueSuccessor()
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:261
BasicBlock * getUniquePredecessor()
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:239
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
size_t i
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr)
Split the specified block at the specified instruction - everything before SplitPt stays in Old and e...
Legacy pass manager pass to access dependence information.
unsigned getNumOperands() const
Definition: User.h:167
static bool isProfitableForVectorization(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix)
The main scalar evolution driver.
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes)
Returns true if Phi is a reduction in TheLoop.
Definition: LoopUtils.cpp:475
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
LoopT * getParentLoop() const
Definition: LoopInfo.h:103
An instruction for reading from memory.
Definition: Instructions.h:164
Hexagon Common GEP
DependenceInfo - This class is the main dependence-analysis driver.
BlockT * getHeader() const
Definition: LoopInfo.h:102
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Definition: LoopInfo.h:287
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:191
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:157
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:228
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
bool isIdenticalTo(const Instruction *I) const
Return true if the specified instruction is exactly identical to the current one. ...
static GCRegistry::Add< StatepointGC > D("statepoint-example","an example strategy for statepoint")
Instruction * getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:180
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:750
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:55
bool isNegative() const
Definition: Constants.h:193
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:241
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
#define F(x, y, z)
Definition: MD5.cpp:51
static BasicBlock * getLoopLatchExitBlock(BasicBlock *LatchBlock, BasicBlock *LoopHeader)
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:188
This node represents a polynomial recurrence on the trip count of the specified loop.
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:279
BasicBlock * getSuccessor(unsigned i) const
An instruction for storing to memory.
Definition: Instructions.h:300
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:401
loop interchange
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:96
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
unsigned getNumIncomingValues() const
Return the number of incoming edges.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:830
const SCEV * getCouldNotCompute()
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
iterator begin() const
Definition: LoopInfo.h:132
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:109
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
Conditional or Unconditional Branch instruction.
char & LCSSAID
Definition: LCSSA.cpp:379
loop Interchanges loops for cache reuse
iterator end() const
Definition: LoopInfo.h:133
Represent the analysis usage information of a pass.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
void splice(iterator where, iplist_impl &L2)
Definition: ilist.h:342
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:249
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
for(unsigned i=0, e=MI->getNumOperands();i!=e;++i)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:52
Value * getOperand(unsigned i) const
Definition: User.h:145
self_iterator getIterator()
Definition: ilist_node.h:81
static unsigned getIncomingValueNumForOperand(unsigned i)
void initializeLoopInterchangePass(PassRegistry &)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
char & LoopSimplifyID
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: LoopUtils.h:63
INITIALIZE_PASS_BEGIN(LoopInterchange,"loop-interchange","Interchanges loops for cache reuse", false, false) INITIALIZE_PASS_END(LoopInterchange
Iterator for intrusive lists based on ilist_node.
static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock)
This is the shared class of boolean and integer constants.
Definition: Constants.h:88
A struct for saving information about induction variables.
Definition: LoopUtils.h:261
void setIncomingBlock(unsigned i, BasicBlock *BB)
Pass * createLoopInterchangePass()
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
iterator end()
Definition: BasicBlock.h:230
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:289
static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore)
Move all instructions except the terminator from FromBB right before InsertBefore.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
Module.h This file contains the declarations for the Module class.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:382
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition: LoopInfo.cpp:110
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:198
ConstantInt * getValue() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
Value * getIncomingValueForBlock(const BasicBlock *BB) const
iterator_range< user_iterator > users()
Definition: Value.h:370
std::vector< BlockT * >::const_iterator block_iterator
Definition: LoopInfo.h:140
block_iterator block_end() const
Definition: LoopInfo.h:142
This class represents an analyzed expression in the program.
unsigned getNumSuccessors() const
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:368
#define I(x, y, z)
Definition: MD5.cpp:54
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:124
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
BlockT * getLoopPredecessor() const
If the given loop's header has exactly one unique predecessor outside the loop, return it...
Definition: LoopInfoImpl.h:131
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
const Loop * getLoop() const
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:374
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:807
const SCEV * getBackedgeTakenCount(const Loop *L)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:71
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
bool empty() const
Definition: LoopInfo.h:136
#define DEBUG(X)
Definition: Debug.h:100
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:127
block_iterator block_begin() const
Definition: LoopInfo.h:141
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:831
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:217
This pass exposes codegen information to IR-level passes.
std::vector< LoopT * >::const_iterator iterator
Definition: LoopInfo.h:129
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
const BasicBlock * getParent() const
Definition: Instruction.h:62
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr)
Returns true if Phi is an induction in the loop L.
Definition: LoopUtils.cpp:854
loops
Definition: LoopInfo.cpp:709
This class represents a constant integer value.