LLVM 20.0.0git
LoopInterchange.cpp
Go to the documentation of this file.
1//===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This Pass handles loop interchange transform.
10// This pass interchanges loops to provide a more cache-friendly memory access
11// patterns.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/ADT/StringSet.h"
29#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/Dominators.h"
32#include "llvm/IR/Function.h"
33#include "llvm/IR/InstrTypes.h"
34#include "llvm/IR/Instruction.h"
36#include "llvm/IR/User.h"
37#include "llvm/IR/Value.h"
40#include "llvm/Support/Debug.h"
46#include <cassert>
47#include <utility>
48#include <vector>
49
50using namespace llvm;
51
52#define DEBUG_TYPE "loop-interchange"
53
54STATISTIC(LoopsInterchanged, "Number of loops interchanged");
55
57 "loop-interchange-threshold", cl::init(0), cl::Hidden,
58 cl::desc("Interchange if you gain more than this number"));
59
60namespace {
61
62using LoopVector = SmallVector<Loop *, 8>;
63
64// TODO: Check if we can use a sparse matrix here.
65using CharMatrix = std::vector<std::vector<char>>;
66
67} // end anonymous namespace
68
69// Maximum number of dependencies that can be handled in the dependency matrix.
70static const unsigned MaxMemInstrCount = 100;
71
72// Maximum loop depth supported.
73static const unsigned MaxLoopNestDepth = 10;
74
75#ifndef NDEBUG
76static void printDepMatrix(CharMatrix &DepMatrix) {
77 for (auto &Row : DepMatrix) {
78 for (auto D : Row)
79 LLVM_DEBUG(dbgs() << D << " ");
80 LLVM_DEBUG(dbgs() << "\n");
81 }
82}
83#endif
84
85static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
86 Loop *L, DependenceInfo *DI,
87 ScalarEvolution *SE) {
88 using ValueVector = SmallVector<Value *, 16>;
89
90 ValueVector MemInstr;
91
92 // For each block.
93 for (BasicBlock *BB : L->blocks()) {
94 // Scan the BB and collect legal loads and stores.
95 for (Instruction &I : *BB) {
96 if (!isa<Instruction>(I))
97 return false;
98 if (auto *Ld = dyn_cast<LoadInst>(&I)) {
99 if (!Ld->isSimple())
100 return false;
101 MemInstr.push_back(&I);
102 } else if (auto *St = dyn_cast<StoreInst>(&I)) {
103 if (!St->isSimple())
104 return false;
105 MemInstr.push_back(&I);
106 }
107 }
108 }
109
110 LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()
111 << " Loads and Stores to analyze\n");
112
113 ValueVector::iterator I, IE, J, JE;
114 StringSet<> Seen;
115
116 for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
117 for (J = I, JE = MemInstr.end(); J != JE; ++J) {
118 std::vector<char> Dep;
119 Instruction *Src = cast<Instruction>(*I);
120 Instruction *Dst = cast<Instruction>(*J);
121 // Ignore Input dependencies.
122 if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
123 continue;
124 // Track Output, Flow, and Anti dependencies.
125 if (auto D = DI->depends(Src, Dst, true)) {
126 assert(D->isOrdered() && "Expected an output, flow or anti dep.");
127 // If the direction vector is negative, normalize it to
128 // make it non-negative.
129 if (D->normalize(SE))
130 LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n");
131 LLVM_DEBUG(StringRef DepType =
132 D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
133 dbgs() << "Found " << DepType
134 << " dependency between Src and Dst\n"
135 << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
136 unsigned Levels = D->getLevels();
137 char Direction;
138 for (unsigned II = 1; II <= Levels; ++II) {
139 if (D->isScalar(II)) {
140 Direction = 'S';
141 Dep.push_back(Direction);
142 } else {
143 unsigned Dir = D->getDirection(II);
144 if (Dir == Dependence::DVEntry::LT ||
146 Direction = '<';
147 else if (Dir == Dependence::DVEntry::GT ||
149 Direction = '>';
150 else if (Dir == Dependence::DVEntry::EQ)
151 Direction = '=';
152 else
153 Direction = '*';
154 Dep.push_back(Direction);
155 }
156 }
157 while (Dep.size() != Level) {
158 Dep.push_back('I');
159 }
160
161 // Make sure we only add unique entries to the dependency matrix.
162 if (Seen.insert(StringRef(Dep.data(), Dep.size())).second)
163 DepMatrix.push_back(Dep);
164
165 if (DepMatrix.size() > MaxMemInstrCount) {
166 LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
167 << " dependencies inside loop\n");
168 return false;
169 }
170 }
171 }
172 }
173
174 return true;
175}
176
177// A loop is moved from index 'from' to an index 'to'. Update the Dependence
178// matrix by exchanging the two columns.
179static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
180 unsigned ToIndx) {
181 for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I)
182 std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]);
183}
184
185// After interchanging, check if the direction vector is valid.
186// [Theorem] A permutation of the loops in a perfect nest is legal if and only
187// if the direction matrix, after the same permutation is applied to its
188// columns, has no ">" direction as the leftmost non-"=" direction in any row.
189static bool isLexicographicallyPositive(std::vector<char> &DV) {
190 for (unsigned char Direction : DV) {
191 if (Direction == '<')
192 return true;
193 if (Direction == '>' || Direction == '*')
194 return false;
195 }
196 return true;
197}
198
199// Checks if it is legal to interchange 2 loops.
200static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
201 unsigned InnerLoopId,
202 unsigned OuterLoopId) {
203 unsigned NumRows = DepMatrix.size();
204 std::vector<char> Cur;
205 // For each row check if it is valid to interchange.
206 for (unsigned Row = 0; Row < NumRows; ++Row) {
207 // Create temporary DepVector check its lexicographical order
208 // before and after swapping OuterLoop vs InnerLoop
209 Cur = DepMatrix[Row];
211 return false;
212 std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
214 return false;
215 }
216 return true;
217}
218
219static void populateWorklist(Loop &L, LoopVector &LoopList) {
220 LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
221 << L.getHeader()->getParent()->getName() << " Loop: %"
222 << L.getHeader()->getName() << '\n');
223 assert(LoopList.empty() && "LoopList should initially be empty!");
224 Loop *CurrentLoop = &L;
225 const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
226 while (!Vec->empty()) {
227 // The current loop has multiple subloops in it hence it is not tightly
228 // nested.
229 // Discard all loops above it added into Worklist.
230 if (Vec->size() != 1) {
231 LoopList = {};
232 return;
233 }
234
235 LoopList.push_back(CurrentLoop);
236 CurrentLoop = Vec->front();
237 Vec = &CurrentLoop->getSubLoops();
238 }
239 LoopList.push_back(CurrentLoop);
240}
241
243 unsigned LoopNestDepth = LoopList.size();
244 if (LoopNestDepth < 2) {
245 LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
246 return false;
247 }
248 return true;
249}
250namespace {
251
252/// LoopInterchangeLegality checks if it is legal to interchange the loop.
253class LoopInterchangeLegality {
254public:
255 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
257 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
258
259 /// Check if the loops can be interchanged.
260 bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
261 CharMatrix &DepMatrix);
262
263 /// Discover induction PHIs in the header of \p L. Induction
264 /// PHIs are added to \p Inductions.
265 bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
266
267 /// Check if the loop structure is understood. We do not handle triangular
268 /// loops for now.
269 bool isLoopStructureUnderstood();
270
271 bool currentLimitations();
272
273 const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
274 return OuterInnerReductions;
275 }
276
277 const SmallVectorImpl<PHINode *> &getInnerLoopInductions() const {
278 return InnerLoopInductions;
279 }
280
281private:
282 bool tightlyNested(Loop *Outer, Loop *Inner);
283 bool containsUnsafeInstructions(BasicBlock *BB);
284
285 /// Discover induction and reduction PHIs in the header of \p L. Induction
286 /// PHIs are added to \p Inductions, reductions are added to
287 /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
288 /// to be passed as \p InnerLoop.
289 bool findInductionAndReductions(Loop *L,
290 SmallVector<PHINode *, 8> &Inductions,
291 Loop *InnerLoop);
292
293 Loop *OuterLoop;
294 Loop *InnerLoop;
295
296 ScalarEvolution *SE;
297
298 /// Interface to emit optimization remarks.
300
301 /// Set of reduction PHIs taking part of a reduction across the inner and
302 /// outer loop.
303 SmallPtrSet<PHINode *, 4> OuterInnerReductions;
304
305 /// Set of inner loop induction PHIs
306 SmallVector<PHINode *, 8> InnerLoopInductions;
307};
308
309/// LoopInterchangeProfitability checks if it is profitable to interchange the
310/// loop.
311class LoopInterchangeProfitability {
312public:
313 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
315 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
316
317 /// Check if the loop interchange is profitable.
318 bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
319 unsigned InnerLoopId, unsigned OuterLoopId,
320 CharMatrix &DepMatrix,
322 std::unique_ptr<CacheCost> &CC);
323
324private:
325 int getInstrOrderCost();
326 std::optional<bool> isProfitablePerLoopCacheAnalysis(
328 std::unique_ptr<CacheCost> &CC);
329 std::optional<bool> isProfitablePerInstrOrderCost();
330 std::optional<bool> isProfitableForVectorization(unsigned InnerLoopId,
331 unsigned OuterLoopId,
332 CharMatrix &DepMatrix);
333 Loop *OuterLoop;
334 Loop *InnerLoop;
335
336 /// Scev analysis.
337 ScalarEvolution *SE;
338
339 /// Interface to emit optimization remarks.
341};
342
343/// LoopInterchangeTransform interchanges the loop.
344class LoopInterchangeTransform {
345public:
346 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
347 LoopInfo *LI, DominatorTree *DT,
348 const LoopInterchangeLegality &LIL)
349 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
350
351 /// Interchange OuterLoop and InnerLoop.
352 bool transform();
353 void restructureLoops(Loop *NewInner, Loop *NewOuter,
354 BasicBlock *OrigInnerPreHeader,
355 BasicBlock *OrigOuterPreHeader);
356 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
357
358private:
359 bool adjustLoopLinks();
360 bool adjustLoopBranches();
361
362 Loop *OuterLoop;
363 Loop *InnerLoop;
364
365 /// Scev analysis.
366 ScalarEvolution *SE;
367
368 LoopInfo *LI;
369 DominatorTree *DT;
370
371 const LoopInterchangeLegality &LIL;
372};
373
374struct LoopInterchange {
375 ScalarEvolution *SE = nullptr;
376 LoopInfo *LI = nullptr;
377 DependenceInfo *DI = nullptr;
378 DominatorTree *DT = nullptr;
379 std::unique_ptr<CacheCost> CC = nullptr;
380
381 /// Interface to emit optimization remarks.
383
384 LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
385 DominatorTree *DT, std::unique_ptr<CacheCost> &CC,
387 : SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC)), ORE(ORE) {}
388
389 bool run(Loop *L) {
390 if (L->getParentLoop())
391 return false;
392 SmallVector<Loop *, 8> LoopList;
393 populateWorklist(*L, LoopList);
394 return processLoopList(LoopList);
395 }
396
397 bool run(LoopNest &LN) {
398 SmallVector<Loop *, 8> LoopList(LN.getLoops());
399 for (unsigned I = 1; I < LoopList.size(); ++I)
400 if (LoopList[I]->getParentLoop() != LoopList[I - 1])
401 return false;
402 return processLoopList(LoopList);
403 }
404
405 bool isComputableLoopNest(ArrayRef<Loop *> LoopList) {
406 for (Loop *L : LoopList) {
407 const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
408 if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
409 LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
410 return false;
411 }
412 if (L->getNumBackEdges() != 1) {
413 LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
414 return false;
415 }
416 if (!L->getExitingBlock()) {
417 LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
418 return false;
419 }
420 }
421 return true;
422 }
423
424 unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
425 // TODO: Add a better heuristic to select the loop to be interchanged based
426 // on the dependence matrix. Currently we select the innermost loop.
427 return LoopList.size() - 1;
428 }
429
430 bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
431 bool Changed = false;
432
433 // Ensure minimum loop nest depth.
434 assert(hasMinimumLoopDepth(LoopList) && "Loop nest does not meet minimum depth.");
435
436 unsigned LoopNestDepth = LoopList.size();
437 if (LoopNestDepth > MaxLoopNestDepth) {
438 LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
439 << MaxLoopNestDepth << "\n");
440 return false;
441 }
442 if (!isComputableLoopNest(LoopList)) {
443 LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
444 return false;
445 }
446
447 LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
448 << "\n");
449
450 CharMatrix DependencyMatrix;
451 Loop *OuterMostLoop = *(LoopList.begin());
452 if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
453 OuterMostLoop, DI, SE)) {
454 LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
455 return false;
456 }
457
458 LLVM_DEBUG(dbgs() << "Dependency matrix before interchange:\n";
459 printDepMatrix(DependencyMatrix));
460
461 // Get the Outermost loop exit.
462 BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
463 if (!LoopNestExit) {
464 LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block");
465 return false;
466 }
467
468 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
469 // Obtain the loop vector returned from loop cache analysis beforehand,
470 // and put each <Loop, index> pair into a map for constant time query
471 // later. Indices in loop vector reprsent the optimal order of the
472 // corresponding loop, e.g., given a loopnest with depth N, index 0
473 // indicates the loop should be placed as the outermost loop and index N
474 // indicates the loop should be placed as the innermost loop.
475 //
476 // For the old pass manager CacheCost would be null.
478 if (CC != nullptr) {
479 const auto &LoopCosts = CC->getLoopCosts();
480 for (unsigned i = 0; i < LoopCosts.size(); i++) {
481 CostMap[LoopCosts[i].first] = i;
482 }
483 }
484 // We try to achieve the globally optimal memory access for the loopnest,
485 // and do interchange based on a bubble-sort fasion. We start from
486 // the innermost loop, move it outwards to the best possible position
487 // and repeat this process.
488 for (unsigned j = SelecLoopId; j > 0; j--) {
489 bool ChangedPerIter = false;
490 for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
491 bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
492 DependencyMatrix, CostMap);
493 if (!Interchanged)
494 continue;
495 // Loops interchanged, update LoopList accordingly.
496 std::swap(LoopList[i - 1], LoopList[i]);
497 // Update the DependencyMatrix
498 interChangeDependencies(DependencyMatrix, i, i - 1);
499
500 LLVM_DEBUG(dbgs() << "Dependency matrix after interchange:\n";
501 printDepMatrix(DependencyMatrix));
502
503 ChangedPerIter |= Interchanged;
504 Changed |= Interchanged;
505 }
506 // Early abort if there was no interchange during an entire round of
507 // moving loops outwards.
508 if (!ChangedPerIter)
509 break;
510 }
511 return Changed;
512 }
513
514 bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
515 unsigned OuterLoopId,
516 std::vector<std::vector<char>> &DependencyMatrix,
517 const DenseMap<const Loop *, unsigned> &CostMap) {
518 LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
519 << " and OuterLoopId = " << OuterLoopId << "\n");
520 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
521 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
522 LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
523 return false;
524 }
525 LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
526 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
527 if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
528 DependencyMatrix, CostMap, CC)) {
529 LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
530 return false;
531 }
532
533 ORE->emit([&]() {
534 return OptimizationRemark(DEBUG_TYPE, "Interchanged",
535 InnerLoop->getStartLoc(),
536 InnerLoop->getHeader())
537 << "Loop interchanged with enclosing loop.";
538 });
539
540 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
541 LIT.transform();
542 LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
543 LoopsInterchanged++;
544
545 llvm::formLCSSARecursively(*OuterLoop, *DT, LI, SE);
546 return true;
547 }
548};
549
550} // end anonymous namespace
551
552bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
553 return any_of(*BB, [](const Instruction &I) {
554 return I.mayHaveSideEffects() || I.mayReadFromMemory();
555 });
556}
557
558bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
559 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
560 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
561 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
562
563 LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n");
564
565 // A perfectly nested loop will not have any branch in between the outer and
566 // inner block i.e. outer header will branch to either inner preheader and
567 // outerloop latch.
568 BranchInst *OuterLoopHeaderBI =
569 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
570 if (!OuterLoopHeaderBI)
571 return false;
572
573 for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
574 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
575 Succ != OuterLoopLatch)
576 return false;
577
578 LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
579 // We do not have any basic block in between now make sure the outer header
580 // and outer loop latch doesn't contain any unsafe instructions.
581 if (containsUnsafeInstructions(OuterLoopHeader) ||
582 containsUnsafeInstructions(OuterLoopLatch))
583 return false;
584
585 // Also make sure the inner loop preheader does not contain any unsafe
586 // instructions. Note that all instructions in the preheader will be moved to
587 // the outer loop header when interchanging.
588 if (InnerLoopPreHeader != OuterLoopHeader &&
589 containsUnsafeInstructions(InnerLoopPreHeader))
590 return false;
591
592 BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
593 // Ensure the inner loop exit block flows to the outer loop latch possibly
594 // through empty blocks.
595 const BasicBlock &SuccInner =
596 LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch);
597 if (&SuccInner != OuterLoopLatch) {
598 LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExit
599 << " does not lead to the outer loop latch.\n";);
600 return false;
601 }
602 // The inner loop exit block does flow to the outer loop latch and not some
603 // other BBs, now make sure it contains safe instructions, since it will be
604 // moved into the (new) inner loop after interchange.
605 if (containsUnsafeInstructions(InnerLoopExit))
606 return false;
607
608 LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
609 // We have a perfect loop nest.
610 return true;
611}
612
613bool LoopInterchangeLegality::isLoopStructureUnderstood() {
614 BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
615 for (PHINode *InnerInduction : InnerLoopInductions) {
616 unsigned Num = InnerInduction->getNumOperands();
617 for (unsigned i = 0; i < Num; ++i) {
618 Value *Val = InnerInduction->getOperand(i);
619 if (isa<Constant>(Val))
620 continue;
621 Instruction *I = dyn_cast<Instruction>(Val);
622 if (!I)
623 return false;
624 // TODO: Handle triangular loops.
625 // e.g. for(int i=0;i<N;i++)
626 // for(int j=i;j<N;j++)
627 unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
628 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
629 InnerLoopPreheader &&
630 !OuterLoop->isLoopInvariant(I)) {
631 return false;
632 }
633 }
634 }
635
636 // TODO: Handle triangular loops of another form.
637 // e.g. for(int i=0;i<N;i++)
638 // for(int j=0;j<i;j++)
639 // or,
640 // for(int i=0;i<N;i++)
641 // for(int j=0;j*i<N;j++)
642 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
643 BranchInst *InnerLoopLatchBI =
644 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
645 if (!InnerLoopLatchBI->isConditional())
646 return false;
647 if (CmpInst *InnerLoopCmp =
648 dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) {
649 Value *Op0 = InnerLoopCmp->getOperand(0);
650 Value *Op1 = InnerLoopCmp->getOperand(1);
651
652 // LHS and RHS of the inner loop exit condition, e.g.,
653 // in "for(int j=0;j<i;j++)", LHS is j and RHS is i.
654 Value *Left = nullptr;
655 Value *Right = nullptr;
656
657 // Check if V only involves inner loop induction variable.
658 // Return true if V is InnerInduction, or a cast from
659 // InnerInduction, or a binary operator that involves
660 // InnerInduction and a constant.
661 std::function<bool(Value *)> IsPathToInnerIndVar;
662 IsPathToInnerIndVar = [this, &IsPathToInnerIndVar](const Value *V) -> bool {
663 if (llvm::is_contained(InnerLoopInductions, V))
664 return true;
665 if (isa<Constant>(V))
666 return true;
667 const Instruction *I = dyn_cast<Instruction>(V);
668 if (!I)
669 return false;
670 if (isa<CastInst>(I))
671 return IsPathToInnerIndVar(I->getOperand(0));
672 if (isa<BinaryOperator>(I))
673 return IsPathToInnerIndVar(I->getOperand(0)) &&
674 IsPathToInnerIndVar(I->getOperand(1));
675 return false;
676 };
677
678 // In case of multiple inner loop indvars, it is okay if LHS and RHS
679 // are both inner indvar related variables.
680 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
681 return true;
682
683 // Otherwise we check if the cmp instruction compares an inner indvar
684 // related variable (Left) with a outer loop invariant (Right).
685 if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
686 Left = Op0;
687 Right = Op1;
688 } else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
689 Left = Op1;
690 Right = Op0;
691 }
692
693 if (Left == nullptr)
694 return false;
695
696 const SCEV *S = SE->getSCEV(Right);
697 if (!SE->isLoopInvariant(S, OuterLoop))
698 return false;
699 }
700
701 return true;
702}
703
704// If SV is a LCSSA PHI node with a single incoming value, return the incoming
705// value.
706static Value *followLCSSA(Value *SV) {
707 PHINode *PHI = dyn_cast<PHINode>(SV);
708 if (!PHI)
709 return SV;
710
711 if (PHI->getNumIncomingValues() != 1)
712 return SV;
713 return followLCSSA(PHI->getIncomingValue(0));
714}
715
716// Check V's users to see if it is involved in a reduction in L.
718 // Reduction variables cannot be constants.
719 if (isa<Constant>(V))
720 return nullptr;
721
722 for (Value *User : V->users()) {
723 if (PHINode *PHI = dyn_cast<PHINode>(User)) {
724 if (PHI->getNumIncomingValues() == 1)
725 continue;
728 // Detect floating point reduction only when it can be reordered.
729 if (RD.getExactFPMathInst() != nullptr)
730 return nullptr;
731 return PHI;
732 }
733 return nullptr;
734 }
735 }
736
737 return nullptr;
738}
739
740bool LoopInterchangeLegality::findInductionAndReductions(
741 Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
742 if (!L->getLoopLatch() || !L->getLoopPredecessor())
743 return false;
744 for (PHINode &PHI : L->getHeader()->phis()) {
747 Inductions.push_back(&PHI);
748 else {
749 // PHIs in inner loops need to be part of a reduction in the outer loop,
750 // discovered when checking the PHIs of the outer loop earlier.
751 if (!InnerLoop) {
752 if (!OuterInnerReductions.count(&PHI)) {
753 LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
754 "across the outer loop.\n");
755 return false;
756 }
757 } else {
758 assert(PHI.getNumIncomingValues() == 2 &&
759 "Phis in loop header should have exactly 2 incoming values");
760 // Check if we have a PHI node in the outer loop that has a reduction
761 // result from the inner loop as an incoming value.
762 Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
763 PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
764 if (!InnerRedPhi ||
765 !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
767 dbgs()
768 << "Failed to recognize PHI as an induction or reduction.\n");
769 return false;
770 }
771 OuterInnerReductions.insert(&PHI);
772 OuterInnerReductions.insert(InnerRedPhi);
773 }
774 }
775 }
776 return true;
777}
778
779// This function indicates the current limitations in the transform as a result
780// of which we do not proceed.
781bool LoopInterchangeLegality::currentLimitations() {
782 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
783
784 // transform currently expects the loop latches to also be the exiting
785 // blocks.
786 if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
787 OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
788 !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
789 !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
791 dbgs() << "Loops where the latch is not the exiting block are not"
792 << " supported currently.\n");
793 ORE->emit([&]() {
794 return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch",
795 OuterLoop->getStartLoc(),
796 OuterLoop->getHeader())
797 << "Loops where the latch is not the exiting block cannot be"
798 " interchange currently.";
799 });
800 return true;
801 }
802
803 SmallVector<PHINode *, 8> Inductions;
804 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
806 dbgs() << "Only outer loops with induction or reduction PHI nodes "
807 << "are supported currently.\n");
808 ORE->emit([&]() {
809 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
810 OuterLoop->getStartLoc(),
811 OuterLoop->getHeader())
812 << "Only outer loops with induction or reduction PHI nodes can be"
813 " interchanged currently.";
814 });
815 return true;
816 }
817
818 Inductions.clear();
819 // For multi-level loop nests, make sure that all phi nodes for inner loops
820 // at all levels can be recognized as a induction or reduction phi. Bail out
821 // if a phi node at a certain nesting level cannot be properly recognized.
822 Loop *CurLevelLoop = OuterLoop;
823 while (!CurLevelLoop->getSubLoops().empty()) {
824 // We already made sure that the loop nest is tightly nested.
825 CurLevelLoop = CurLevelLoop->getSubLoops().front();
826 if (!findInductionAndReductions(CurLevelLoop, Inductions, nullptr)) {
828 dbgs() << "Only inner loops with induction or reduction PHI nodes "
829 << "are supported currently.\n");
830 ORE->emit([&]() {
831 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
832 CurLevelLoop->getStartLoc(),
833 CurLevelLoop->getHeader())
834 << "Only inner loops with induction or reduction PHI nodes can be"
835 " interchange currently.";
836 });
837 return true;
838 }
839 }
840
841 // TODO: Triangular loops are not handled for now.
842 if (!isLoopStructureUnderstood()) {
843 LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n");
844 ORE->emit([&]() {
845 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner",
846 InnerLoop->getStartLoc(),
847 InnerLoop->getHeader())
848 << "Inner loop structure not understood currently.";
849 });
850 return true;
851 }
852
853 return false;
854}
855
856bool LoopInterchangeLegality::findInductions(
857 Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
858 for (PHINode &PHI : L->getHeader()->phis()) {
861 Inductions.push_back(&PHI);
862 }
863 return !Inductions.empty();
864}
865
866// We currently only support LCSSA PHI nodes in the inner loop exit, if their
867// users are either reduction PHIs or PHIs outside the outer loop (which means
868// the we are only interested in the final value after the loop).
869static bool
872 BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
873 for (PHINode &PHI : InnerExit->phis()) {
874 // Reduction lcssa phi will have only 1 incoming block that from loop latch.
875 if (PHI.getNumIncomingValues() > 1)
876 return false;
877 if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
878 PHINode *PN = dyn_cast<PHINode>(U);
879 return !PN ||
880 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
881 })) {
882 return false;
883 }
884 }
885 return true;
886}
887
888// We currently support LCSSA PHI nodes in the outer loop exit, if their
889// incoming values do not come from the outer loop latch or if the
890// outer loop latch has a single predecessor. In that case, the value will
891// be available if both the inner and outer loop conditions are true, which
892// will still be true after interchanging. If we have multiple predecessor,
893// that may not be the case, e.g. because the outer loop latch may be executed
894// if the inner loop is not executed.
895static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
896 BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
897 for (PHINode &PHI : LoopNestExit->phis()) {
898 for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
899 Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
900 if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
901 continue;
902
903 // The incoming value is defined in the outer loop latch. Currently we
904 // only support that in case the outer loop latch has a single predecessor.
905 // This guarantees that the outer loop latch is executed if and only if
906 // the inner loop is executed (because tightlyNested() guarantees that the
907 // outer loop header only branches to the inner loop or the outer loop
908 // latch).
909 // FIXME: We could weaken this logic and allow multiple predecessors,
910 // if the values are produced outside the loop latch. We would need
911 // additional logic to update the PHI nodes in the exit block as
912 // well.
913 if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
914 return false;
915 }
916 }
917 return true;
918}
919
920// In case of multi-level nested loops, it may occur that lcssa phis exist in
921// the latch of InnerLoop, i.e., when defs of the incoming values are further
922// inside the loopnest. Sometimes those incoming values are not available
923// after interchange, since the original inner latch will become the new outer
924// latch which may have predecessor paths that do not include those incoming
925// values.
926// TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of
927// multi-level loop nests.
928static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
929 if (InnerLoop->getSubLoops().empty())
930 return true;
931 // If the original outer latch has only one predecessor, then values defined
932 // further inside the looploop, e.g., in the innermost loop, will be available
933 // at the new outer latch after interchange.
934 if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr)
935 return true;
936
937 // The outer latch has more than one predecessors, i.e., the inner
938 // exit and the inner header.
939 // PHI nodes in the inner latch are lcssa phis where the incoming values
940 // are defined further inside the loopnest. Check if those phis are used
941 // in the original inner latch. If that is the case then bail out since
942 // those incoming values may not be available at the new outer latch.
943 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
944 for (PHINode &PHI : InnerLoopLatch->phis()) {
945 for (auto *U : PHI.users()) {
946 Instruction *UI = cast<Instruction>(U);
947 if (InnerLoopLatch == UI->getParent())
948 return false;
949 }
950 }
951 return true;
952}
953
954bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
955 unsigned OuterLoopId,
956 CharMatrix &DepMatrix) {
957 if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
958 LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
959 << " and OuterLoopId = " << OuterLoopId
960 << " due to dependence\n");
961 ORE->emit([&]() {
962 return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",
963 InnerLoop->getStartLoc(),
964 InnerLoop->getHeader())
965 << "Cannot interchange loops due to dependences.";
966 });
967 return false;
968 }
969 // Check if outer and inner loop contain legal instructions only.
970 for (auto *BB : OuterLoop->blocks())
972 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
973 // readnone functions do not prevent interchanging.
974 if (CI->onlyWritesMemory())
975 continue;
977 dbgs() << "Loops with call instructions cannot be interchanged "
978 << "safely.");
979 ORE->emit([&]() {
980 return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst",
981 CI->getDebugLoc(),
982 CI->getParent())
983 << "Cannot interchange loops due to call instruction.";
984 });
985
986 return false;
987 }
988
989 if (!findInductions(InnerLoop, InnerLoopInductions)) {
990 LLVM_DEBUG(dbgs() << "Could not find inner loop induction variables.\n");
991 return false;
992 }
993
994 if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) {
995 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n");
996 ORE->emit([&]() {
997 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedInnerLatchPHI",
998 InnerLoop->getStartLoc(),
999 InnerLoop->getHeader())
1000 << "Cannot interchange loops because unsupported PHI nodes found "
1001 "in inner loop latch.";
1002 });
1003 return false;
1004 }
1005
1006 // TODO: The loops could not be interchanged due to current limitations in the
1007 // transform module.
1008 if (currentLimitations()) {
1009 LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n");
1010 return false;
1011 }
1012
1013 // Check if the loops are tightly nested.
1014 if (!tightlyNested(OuterLoop, InnerLoop)) {
1015 LLVM_DEBUG(dbgs() << "Loops not tightly nested\n");
1016 ORE->emit([&]() {
1017 return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested",
1018 InnerLoop->getStartLoc(),
1019 InnerLoop->getHeader())
1020 << "Cannot interchange loops because they are not tightly "
1021 "nested.";
1022 });
1023 return false;
1024 }
1025
1026 if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1027 OuterInnerReductions)) {
1028 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n");
1029 ORE->emit([&]() {
1030 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1031 InnerLoop->getStartLoc(),
1032 InnerLoop->getHeader())
1033 << "Found unsupported PHI node in loop exit.";
1034 });
1035 return false;
1036 }
1037
1038 if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1039 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
1040 ORE->emit([&]() {
1041 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1042 OuterLoop->getStartLoc(),
1043 OuterLoop->getHeader())
1044 << "Found unsupported PHI node in loop exit.";
1045 });
1046 return false;
1047 }
1048
1049 return true;
1050}
1051
1052int LoopInterchangeProfitability::getInstrOrderCost() {
1053 unsigned GoodOrder, BadOrder;
1054 BadOrder = GoodOrder = 0;
1055 for (BasicBlock *BB : InnerLoop->blocks()) {
1056 for (Instruction &Ins : *BB) {
1057 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1058 unsigned NumOp = GEP->getNumOperands();
1059 bool FoundInnerInduction = false;
1060 bool FoundOuterInduction = false;
1061 for (unsigned i = 0; i < NumOp; ++i) {
1062 // Skip operands that are not SCEV-able.
1063 if (!SE->isSCEVable(GEP->getOperand(i)->getType()))
1064 continue;
1065
1066 const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
1067 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1068 if (!AR)
1069 continue;
1070
1071 // If we find the inner induction after an outer induction e.g.
1072 // for(int i=0;i<N;i++)
1073 // for(int j=0;j<N;j++)
1074 // A[i][j] = A[i-1][j-1]+k;
1075 // then it is a good order.
1076 if (AR->getLoop() == InnerLoop) {
1077 // We found an InnerLoop induction after OuterLoop induction. It is
1078 // a good order.
1079 FoundInnerInduction = true;
1080 if (FoundOuterInduction) {
1081 GoodOrder++;
1082 break;
1083 }
1084 }
1085 // If we find the outer induction after an inner induction e.g.
1086 // for(int i=0;i<N;i++)
1087 // for(int j=0;j<N;j++)
1088 // A[j][i] = A[j-1][i-1]+k;
1089 // then it is a bad order.
1090 if (AR->getLoop() == OuterLoop) {
1091 // We found an OuterLoop induction after InnerLoop induction. It is
1092 // a bad order.
1093 FoundOuterInduction = true;
1094 if (FoundInnerInduction) {
1095 BadOrder++;
1096 break;
1097 }
1098 }
1099 }
1100 }
1101 }
1102 }
1103 return GoodOrder - BadOrder;
1104}
1105
1106std::optional<bool>
1107LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
1108 const DenseMap<const Loop *, unsigned> &CostMap,
1109 std::unique_ptr<CacheCost> &CC) {
1110 // This is the new cost model returned from loop cache analysis.
1111 // A smaller index means the loop should be placed an outer loop, and vice
1112 // versa.
1113 if (CostMap.contains(InnerLoop) && CostMap.contains(OuterLoop)) {
1114 unsigned InnerIndex = 0, OuterIndex = 0;
1115 InnerIndex = CostMap.find(InnerLoop)->second;
1116 OuterIndex = CostMap.find(OuterLoop)->second;
1117 LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndex
1118 << ", OuterIndex = " << OuterIndex << "\n");
1119 if (InnerIndex < OuterIndex)
1120 return std::optional<bool>(true);
1121 assert(InnerIndex != OuterIndex && "CostMap should assign unique "
1122 "numbers to each loop");
1123 if (CC->getLoopCost(*OuterLoop) == CC->getLoopCost(*InnerLoop))
1124 return std::nullopt;
1125 return std::optional<bool>(false);
1126 }
1127 return std::nullopt;
1128}
1129
1130std::optional<bool>
1131LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
1132 // Legacy cost model: this is rough cost estimation algorithm. It counts the
1133 // good and bad order of induction variables in the instruction and allows
1134 // reordering if number of bad orders is more than good.
1135 int Cost = getInstrOrderCost();
1136 LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
1138 return std::optional<bool>(true);
1139
1140 return std::nullopt;
1141}
1142
1143std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1144 unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) {
1145 for (auto &Row : DepMatrix) {
1146 // If the inner loop is loop independent or doesn't carry any dependency
1147 // it is not profitable to move this to outer position, since we are
1148 // likely able to do inner loop vectorization already.
1149 if (Row[InnerLoopId] == 'I' || Row[InnerLoopId] == '=')
1150 return std::optional<bool>(false);
1151
1152 // If the outer loop is not loop independent it is not profitable to move
1153 // this to inner position, since doing so would not enable inner loop
1154 // parallelism.
1155 if (Row[OuterLoopId] != 'I' && Row[OuterLoopId] != '=')
1156 return std::optional<bool>(false);
1157 }
1158 // If inner loop has dependence and outer loop is loop independent then it
1159 // is/ profitable to interchange to enable inner loop parallelism.
1160 // If there are no dependences, interchanging will not improve anything.
1161 return std::optional<bool>(!DepMatrix.empty());
1162}
1163
1164bool LoopInterchangeProfitability::isProfitable(
1165 const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
1166 unsigned OuterLoopId, CharMatrix &DepMatrix,
1167 const DenseMap<const Loop *, unsigned> &CostMap,
1168 std::unique_ptr<CacheCost> &CC) {
1169 // isProfitable() is structured to avoid endless loop interchange.
1170 // If loop cache analysis could decide the profitability then,
1171 // profitability check will stop and return the analysis result.
1172 // If cache analysis failed to analyze the loopnest (e.g.,
1173 // due to delinearization issues) then only check whether it is
1174 // profitable for InstrOrderCost. Likewise, if InstrOrderCost failed to
1175 // analysis the profitability then only, isProfitableForVectorization
1176 // will decide.
1177 std::optional<bool> shouldInterchange =
1178 isProfitablePerLoopCacheAnalysis(CostMap, CC);
1179 if (!shouldInterchange.has_value()) {
1180 shouldInterchange = isProfitablePerInstrOrderCost();
1181 if (!shouldInterchange.has_value())
1182 shouldInterchange =
1183 isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
1184 }
1185 if (!shouldInterchange.has_value()) {
1186 ORE->emit([&]() {
1187 return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1188 InnerLoop->getStartLoc(),
1189 InnerLoop->getHeader())
1190 << "Insufficient information to calculate the cost of loop for "
1191 "interchange.";
1192 });
1193 return false;
1194 } else if (!shouldInterchange.value()) {
1195 ORE->emit([&]() {
1196 return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1197 InnerLoop->getStartLoc(),
1198 InnerLoop->getHeader())
1199 << "Interchanging loops is not considered to improve cache "
1200 "locality nor vectorization.";
1201 });
1202 return false;
1203 }
1204 return true;
1205}
1206
1207void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1208 Loop *InnerLoop) {
1209 for (Loop *L : *OuterLoop)
1210 if (L == InnerLoop) {
1211 OuterLoop->removeChildLoop(L);
1212 return;
1213 }
1214 llvm_unreachable("Couldn't find loop");
1215}
1216
1217/// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1218/// new inner and outer loop after interchanging: NewInner is the original
1219/// outer loop and NewOuter is the original inner loop.
1220///
1221/// Before interchanging, we have the following structure
1222/// Outer preheader
1223// Outer header
1224// Inner preheader
1225// Inner header
1226// Inner body
1227// Inner latch
1228// outer bbs
1229// Outer latch
1230//
1231// After interchanging:
1232// Inner preheader
1233// Inner header
1234// Outer preheader
1235// Outer header
1236// Inner body
1237// outer bbs
1238// Outer latch
1239// Inner latch
1240void LoopInterchangeTransform::restructureLoops(
1241 Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1242 BasicBlock *OrigOuterPreHeader) {
1243 Loop *OuterLoopParent = OuterLoop->getParentLoop();
1244 // The original inner loop preheader moves from the new inner loop to
1245 // the parent loop, if there is one.
1246 NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1247 LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1248
1249 // Switch the loop levels.
1250 if (OuterLoopParent) {
1251 // Remove the loop from its parent loop.
1252 removeChildLoop(OuterLoopParent, NewInner);
1253 removeChildLoop(NewInner, NewOuter);
1254 OuterLoopParent->addChildLoop(NewOuter);
1255 } else {
1256 removeChildLoop(NewInner, NewOuter);
1257 LI->changeTopLevelLoop(NewInner, NewOuter);
1258 }
1259 while (!NewOuter->isInnermost())
1260 NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1261 NewOuter->addChildLoop(NewInner);
1262
1263 // BBs from the original inner loop.
1264 SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1265
1266 // Add BBs from the original outer loop to the original inner loop (excluding
1267 // BBs already in inner loop)
1268 for (BasicBlock *BB : NewInner->blocks())
1269 if (LI->getLoopFor(BB) == NewInner)
1270 NewOuter->addBlockEntry(BB);
1271
1272 // Now remove inner loop header and latch from the new inner loop and move
1273 // other BBs (the loop body) to the new inner loop.
1274 BasicBlock *OuterHeader = NewOuter->getHeader();
1275 BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1276 for (BasicBlock *BB : OrigInnerBBs) {
1277 // Nothing will change for BBs in child loops.
1278 if (LI->getLoopFor(BB) != NewOuter)
1279 continue;
1280 // Remove the new outer loop header and latch from the new inner loop.
1281 if (BB == OuterHeader || BB == OuterLatch)
1282 NewInner->removeBlockFromLoop(BB);
1283 else
1284 LI->changeLoopFor(BB, NewInner);
1285 }
1286
1287 // The preheader of the original outer loop becomes part of the new
1288 // outer loop.
1289 NewOuter->addBlockEntry(OrigOuterPreHeader);
1290 LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1291
1292 // Tell SE that we move the loops around.
1293 SE->forgetLoop(NewOuter);
1294}
1295
1296bool LoopInterchangeTransform::transform() {
1297 bool Transformed = false;
1298
1299 if (InnerLoop->getSubLoops().empty()) {
1300 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1301 LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n");
1302 auto &InductionPHIs = LIL.getInnerLoopInductions();
1303 if (InductionPHIs.empty()) {
1304 LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
1305 return false;
1306 }
1307
1308 SmallVector<Instruction *, 8> InnerIndexVarList;
1309 for (PHINode *CurInductionPHI : InductionPHIs) {
1310 if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1311 InnerIndexVarList.push_back(
1312 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
1313 else
1314 InnerIndexVarList.push_back(
1315 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
1316 }
1317
1318 // Create a new latch block for the inner loop. We split at the
1319 // current latch's terminator and then move the condition and all
1320 // operands that are not either loop-invariant or the induction PHI into the
1321 // new latch block.
1322 BasicBlock *NewLatch =
1323 SplitBlock(InnerLoop->getLoopLatch(),
1324 InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
1325
1327 unsigned i = 0;
1328 auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() {
1329 for (; i < WorkList.size(); i++) {
1330 // Duplicate instruction and move it the new latch. Update uses that
1331 // have been moved.
1332 Instruction *NewI = WorkList[i]->clone();
1333 NewI->insertBefore(NewLatch->getFirstNonPHI());
1334 assert(!NewI->mayHaveSideEffects() &&
1335 "Moving instructions with side-effects may change behavior of "
1336 "the loop nest!");
1337 for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) {
1338 Instruction *UserI = cast<Instruction>(U.getUser());
1339 if (!InnerLoop->contains(UserI->getParent()) ||
1340 UserI->getParent() == NewLatch ||
1341 llvm::is_contained(InductionPHIs, UserI))
1342 U.set(NewI);
1343 }
1344 // Add operands of moved instruction to the worklist, except if they are
1345 // outside the inner loop or are the induction PHI.
1346 for (Value *Op : WorkList[i]->operands()) {
1347 Instruction *OpI = dyn_cast<Instruction>(Op);
1348 if (!OpI ||
1349 this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
1350 llvm::is_contained(InductionPHIs, OpI))
1351 continue;
1352 WorkList.insert(OpI);
1353 }
1354 }
1355 };
1356
1357 // FIXME: Should we interchange when we have a constant condition?
1358 Instruction *CondI = dyn_cast<Instruction>(
1359 cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
1360 ->getCondition());
1361 if (CondI)
1362 WorkList.insert(CondI);
1363 MoveInstructions();
1364 for (Instruction *InnerIndexVar : InnerIndexVarList)
1365 WorkList.insert(cast<Instruction>(InnerIndexVar));
1366 MoveInstructions();
1367 }
1368
1369 // Ensure the inner loop phi nodes have a separate basic block.
1370 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1371 if (InnerLoopHeader->getFirstNonPHI() != InnerLoopHeader->getTerminator()) {
1372 SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1373 LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
1374 }
1375
1376 // Instructions in the original inner loop preheader may depend on values
1377 // defined in the outer loop header. Move them there, because the original
1378 // inner loop preheader will become the entry into the interchanged loop nest.
1379 // Currently we move all instructions and rely on LICM to move invariant
1380 // instructions outside the loop nest.
1381 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1382 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1383 if (InnerLoopPreHeader != OuterLoopHeader) {
1385 for (Instruction &I :
1386 make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
1387 std::prev(InnerLoopPreHeader->end()))))
1388 I.moveBeforePreserving(OuterLoopHeader->getTerminator());
1389 }
1390
1391 Transformed |= adjustLoopLinks();
1392 if (!Transformed) {
1393 LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");
1394 return false;
1395 }
1396
1397 return true;
1398}
1399
1400/// \brief Move all instructions except the terminator from FromBB right before
1401/// InsertBefore
1402static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1403 BasicBlock *ToBB = InsertBefore->getParent();
1404
1405 ToBB->splice(InsertBefore->getIterator(), FromBB, FromBB->begin(),
1406 FromBB->getTerminator()->getIterator());
1407}
1408
1409/// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
1410static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
1411 // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
1412 // from BB1 afterwards.
1413 auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
1414 SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
1415 for (Instruction *I : TempInstrs)
1416 I->removeFromParent();
1417
1418 // Move instructions from BB2 to BB1.
1419 moveBBContents(BB2, BB1->getTerminator());
1420
1421 // Move instructions from TempInstrs to BB2.
1422 for (Instruction *I : TempInstrs)
1423 I->insertBefore(BB2->getTerminator());
1424}
1425
1426// Update BI to jump to NewBB instead of OldBB. Records updates to the
1427// dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1428// \p OldBB is exactly once in BI's successor list.
1429static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1430 BasicBlock *NewBB,
1431 std::vector<DominatorTree::UpdateType> &DTUpdates,
1432 bool MustUpdateOnce = true) {
1433 assert((!MustUpdateOnce ||
1435 [OldBB](BasicBlock *BB) {
1436 return BB == OldBB;
1437 }) == 1) && "BI must jump to OldBB exactly once.");
1438 bool Changed = false;
1439 for (Use &Op : BI->operands())
1440 if (Op == OldBB) {
1441 Op.set(NewBB);
1442 Changed = true;
1443 }
1444
1445 if (Changed) {
1446 DTUpdates.push_back(
1447 {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
1448 DTUpdates.push_back(
1449 {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
1450 }
1451 assert(Changed && "Expected a successor to be updated");
1452}
1453
1454// Move Lcssa PHIs to the right place.
1455static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
1456 BasicBlock *InnerLatch, BasicBlock *OuterHeader,
1457 BasicBlock *OuterLatch, BasicBlock *OuterExit,
1458 Loop *InnerLoop, LoopInfo *LI) {
1459
1460 // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
1461 // defined either in the header or latch. Those blocks will become header and
1462 // latch of the new outer loop, and the only possible users can PHI nodes
1463 // in the exit block of the loop nest or the outer loop header (reduction
1464 // PHIs, in that case, the incoming value must be defined in the inner loop
1465 // header). We can just substitute the user with the incoming value and remove
1466 // the PHI.
1467 for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
1468 assert(P.getNumIncomingValues() == 1 &&
1469 "Only loops with a single exit are supported!");
1470
1471 // Incoming values are guaranteed be instructions currently.
1472 auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
1473 // In case of multi-level nested loops, follow LCSSA to find the incoming
1474 // value defined from the innermost loop.
1475 auto IncIInnerMost = cast<Instruction>(followLCSSA(IncI));
1476 // Skip phis with incoming values from the inner loop body, excluding the
1477 // header and latch.
1478 if (IncIInnerMost->getParent() != InnerLatch &&
1479 IncIInnerMost->getParent() != InnerHeader)
1480 continue;
1481
1482 assert(all_of(P.users(),
1483 [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {
1484 return (cast<PHINode>(U)->getParent() == OuterHeader &&
1485 IncI->getParent() == InnerHeader) ||
1486 cast<PHINode>(U)->getParent() == OuterExit;
1487 }) &&
1488 "Can only replace phis iff the uses are in the loop nest exit or "
1489 "the incoming value is defined in the inner header (it will "
1490 "dominate all loop blocks after interchanging)");
1491 P.replaceAllUsesWith(IncI);
1492 P.eraseFromParent();
1493 }
1494
1495 SmallVector<PHINode *, 8> LcssaInnerExit;
1496 for (PHINode &P : InnerExit->phis())
1497 LcssaInnerExit.push_back(&P);
1498
1499 SmallVector<PHINode *, 8> LcssaInnerLatch;
1500 for (PHINode &P : InnerLatch->phis())
1501 LcssaInnerLatch.push_back(&P);
1502
1503 // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1504 // If a PHI node has users outside of InnerExit, it has a use outside the
1505 // interchanged loop and we have to preserve it. We move these to
1506 // InnerLatch, which will become the new exit block for the innermost
1507 // loop after interchanging.
1508 for (PHINode *P : LcssaInnerExit)
1509 P->moveBefore(InnerLatch->getFirstNonPHI());
1510
1511 // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1512 // and we have to move them to the new inner latch.
1513 for (PHINode *P : LcssaInnerLatch)
1514 P->moveBefore(InnerExit->getFirstNonPHI());
1515
1516 // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1517 // incoming values defined in the outer loop, we have to add a new PHI
1518 // in the inner loop latch, which became the exit block of the outer loop,
1519 // after interchanging.
1520 if (OuterExit) {
1521 for (PHINode &P : OuterExit->phis()) {
1522 if (P.getNumIncomingValues() != 1)
1523 continue;
1524 // Skip Phis with incoming values defined in the inner loop. Those should
1525 // already have been updated.
1526 auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
1527 if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
1528 continue;
1529
1530 PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
1531 NewPhi->setIncomingValue(0, P.getIncomingValue(0));
1532 NewPhi->setIncomingBlock(0, OuterLatch);
1533 // We might have incoming edges from other BBs, i.e., the original outer
1534 // header.
1535 for (auto *Pred : predecessors(InnerLatch)) {
1536 if (Pred == OuterLatch)
1537 continue;
1538 NewPhi->addIncoming(P.getIncomingValue(0), Pred);
1539 }
1540 NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
1541 P.setIncomingValue(0, NewPhi);
1542 }
1543 }
1544
1545 // Now adjust the incoming blocks for the LCSSA PHIs.
1546 // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1547 // with the new latch.
1548 InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
1549}
1550
1551bool LoopInterchangeTransform::adjustLoopBranches() {
1552 LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
1553 std::vector<DominatorTree::UpdateType> DTUpdates;
1554
1555 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1556 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1557
1558 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1559 InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
1560 InnerLoopPreHeader && "Guaranteed by loop-simplify form");
1561 // Ensure that both preheaders do not contain PHI nodes and have single
1562 // predecessors. This allows us to move them easily. We use
1563 // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1564 // preheaders do not satisfy those conditions.
1565 if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1566 !OuterLoopPreHeader->getUniquePredecessor())
1567 OuterLoopPreHeader =
1568 InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
1569 if (InnerLoopPreHeader == OuterLoop->getHeader())
1570 InnerLoopPreHeader =
1571 InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
1572
1573 // Adjust the loop preheader
1574 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1575 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1576 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1577 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1578 BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1579 BasicBlock *InnerLoopLatchPredecessor =
1580 InnerLoopLatch->getUniquePredecessor();
1581 BasicBlock *InnerLoopLatchSuccessor;
1582 BasicBlock *OuterLoopLatchSuccessor;
1583
1584 BranchInst *OuterLoopLatchBI =
1585 dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1586 BranchInst *InnerLoopLatchBI =
1587 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1588 BranchInst *OuterLoopHeaderBI =
1589 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1590 BranchInst *InnerLoopHeaderBI =
1591 dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1592
1593 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1594 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1595 !InnerLoopHeaderBI)
1596 return false;
1597
1598 BranchInst *InnerLoopLatchPredecessorBI =
1599 dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1600 BranchInst *OuterLoopPredecessorBI =
1601 dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1602
1603 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1604 return false;
1605 BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1606 if (!InnerLoopHeaderSuccessor)
1607 return false;
1608
1609 // Adjust Loop Preheader and headers.
1610 // The branches in the outer loop predecessor and the outer loop header can
1611 // be unconditional branches or conditional branches with duplicates. Consider
1612 // this when updating the successors.
1613 updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1614 InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
1615 // The outer loop header might or might not branch to the outer latch.
1616 // We are guaranteed to branch to the inner loop preheader.
1617 if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
1618 // In this case the outerLoopHeader should branch to the InnerLoopLatch.
1619 updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
1620 DTUpdates,
1621 /*MustUpdateOnce=*/false);
1622 }
1623 updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1624 InnerLoopHeaderSuccessor, DTUpdates,
1625 /*MustUpdateOnce=*/false);
1626
1627 // Adjust reduction PHI's now that the incoming block has changed.
1628 InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
1629 OuterLoopHeader);
1630
1631 updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1632 OuterLoopPreHeader, DTUpdates);
1633
1634 // -------------Adjust loop latches-----------
1635 if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1636 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1637 else
1638 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1639
1640 updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1641 InnerLoopLatchSuccessor, DTUpdates);
1642
1643 if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1644 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1645 else
1646 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1647
1648 updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1649 OuterLoopLatchSuccessor, DTUpdates);
1650 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1651 DTUpdates);
1652
1653 DT->applyUpdates(DTUpdates);
1654 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1655 OuterLoopPreHeader);
1656
1657 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1658 OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
1659 InnerLoop, LI);
1660 // For PHIs in the exit block of the outer loop, outer's latch has been
1661 // replaced by Inners'.
1662 OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1663
1664 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1665 // Now update the reduction PHIs in the inner and outer loop headers.
1666 SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1667 for (PHINode &PHI : InnerLoopHeader->phis())
1668 if (OuterInnerReductions.contains(&PHI))
1669 InnerLoopPHIs.push_back(&PHI);
1670
1671 for (PHINode &PHI : OuterLoopHeader->phis())
1672 if (OuterInnerReductions.contains(&PHI))
1673 OuterLoopPHIs.push_back(&PHI);
1674
1675 // Now move the remaining reduction PHIs from outer to inner loop header and
1676 // vice versa. The PHI nodes must be part of a reduction across the inner and
1677 // outer loop and all the remains to do is and updating the incoming blocks.
1678 for (PHINode *PHI : OuterLoopPHIs) {
1679 LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump(););
1680 PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
1681 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
1682 }
1683 for (PHINode *PHI : InnerLoopPHIs) {
1684 LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump(););
1685 PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
1686 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
1687 }
1688
1689 // Update the incoming blocks for moved PHI nodes.
1690 OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
1691 OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
1692 InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
1693 InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1694
1695 // Values defined in the outer loop header could be used in the inner loop
1696 // latch. In that case, we need to create LCSSA phis for them, because after
1697 // interchanging they will be defined in the new inner loop and used in the
1698 // new outer loop.
1699 SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
1700 for (Instruction &I :
1701 make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
1702 MayNeedLCSSAPhis.push_back(&I);
1703 formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE);
1704
1705 return true;
1706}
1707
1708bool LoopInterchangeTransform::adjustLoopLinks() {
1709 // Adjust all branches in the inner and outer loop.
1710 bool Changed = adjustLoopBranches();
1711 if (Changed) {
1712 // We have interchanged the preheaders so we need to interchange the data in
1713 // the preheaders as well. This is because the content of the inner
1714 // preheader was previously executed inside the outer loop.
1715 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1716 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1717 swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
1718 }
1719 return Changed;
1720}
1721
1725 LPMUpdater &U) {
1726 Function &F = *LN.getParent();
1727 SmallVector<Loop *, 8> LoopList(LN.getLoops());
1728 // Ensure minimum depth of the loop nest to do the interchange.
1729 if (!hasMinimumLoopDepth(LoopList))
1730 return PreservedAnalyses::all();
1731
1732 DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
1733 std::unique_ptr<CacheCost> CC =
1736 if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
1737 return PreservedAnalyses::all();
1738 U.markLoopNestChanged(true);
1740}
Rewrite undef for PHI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass
#define LLVM_DEBUG(...)
Definition: Debug.h:106
Hexagon Common GEP
This file defines the interface for the loop cache analysis.
static bool hasMinimumLoopDepth(SmallVectorImpl< Loop * > &LoopList)
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
static PHINode * findInnerReductionPhi(Loop *L, Value *V)
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore)
Move all instructions except the terminator from FromBB right before InsertBefore.
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
static const unsigned MaxLoopNestDepth
static void printDepMatrix(CharMatrix &DepMatrix)
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
static bool areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, SmallPtrSetImpl< PHINode * > &Reductions)
#define DEBUG_TYPE
static const unsigned MaxMemInstrCount
static Value * followLCSSA(Value *SV)
static void populateWorklist(Loop &L, LoopVector &LoopList)
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
static bool isLexicographicallyPositive(std::vector< char > &DV)
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE)
This file defines the interface for the loop nest analysis.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static bool isProfitable(const SmallVector< std::unique_ptr< StableFunctionMap::StableFunctionEntry > > &SFS)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
StringSet - A set-like wrapper for the StringMap.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:517
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:250
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:367
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:497
void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
Definition: BasicBlock.cpp:651
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:467
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition: BasicBlock.h:631
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:661
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147
DependenceInfo - This class is the main dependence-analysis driver.
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
A struct for saving information about induction variables.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:97
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
iterator begin() const
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End, bool CheckUniquePred=false)
Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Function * getParent() const
Return the function to which the loop-nest belongs.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:632
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
static unsigned getIncomingValueNumForOperand(unsigned i)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
This node represents a polynomial recurrence on the trip count of the specified loop.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:288
LLVM Value Representation.
Definition: Value.h:74
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
auto successors(const MachineBasicBlock *BB)
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:465
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
auto map_range(ContainerTy &&C, FuncTy F)
Definition: STLExtras.h:377
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1952
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1873
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
Definition: LCSSA.cpp:325
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:215