LLVM 22.0.0git
LoopInterchange.cpp
Go to the documentation of this file.
1//===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This Pass handles loop interchange transform.
10// This pass interchanges loops to provide a more cache-friendly memory access
11// patterns.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/SmallSet.h"
19#include "llvm/ADT/Statistic.h"
20#include "llvm/ADT/StringMap.h"
21#include "llvm/ADT/StringRef.h"
30#include "llvm/IR/BasicBlock.h"
32#include "llvm/IR/Dominators.h"
33#include "llvm/IR/Function.h"
34#include "llvm/IR/InstrTypes.h"
35#include "llvm/IR/Instruction.h"
37#include "llvm/IR/User.h"
38#include "llvm/IR/Value.h"
41#include "llvm/Support/Debug.h"
47#include <cassert>
48#include <utility>
49#include <vector>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "loop-interchange"
54
55STATISTIC(LoopsInterchanged, "Number of loops interchanged");
56
58 "loop-interchange-threshold", cl::init(0), cl::Hidden,
59 cl::desc("Interchange if you gain more than this number"));
60
61// Maximum number of load-stores that can be handled in the dependency matrix.
63 "loop-interchange-max-meminstr-count", cl::init(64), cl::Hidden,
65 "Maximum number of load-store instructions that should be handled "
66 "in the dependency matrix. Higher value may lead to more interchanges "
67 "at the cost of compile-time"));
68
69namespace {
70
71using LoopVector = SmallVector<Loop *, 8>;
72
73/// A list of direction vectors. Each entry represents a direction vector
74/// corresponding to one or more dependencies existing in the loop nest. The
75/// length of all direction vectors is equal and is N + 1, where N is the depth
76/// of the loop nest. The first N elements correspond to the dependency
77/// direction of each N loops. The last one indicates whether this entry is
78/// forward dependency ('<') or not ('*'). The term "forward" aligns with what
79/// is defined in LoopAccessAnalysis.
80// TODO: Check if we can use a sparse matrix here.
81using CharMatrix = std::vector<std::vector<char>>;
82
83/// Types of rules used in profitability check.
84enum class RuleTy {
85 PerLoopCacheAnalysis,
86 PerInstrOrderCost,
87 ForVectorization,
88 Ignore
89};
90
91} // end anonymous namespace
92
93// Minimum loop depth supported.
95 "loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden,
96 cl::desc("Minimum depth of loop nest considered for the transform"));
97
98// Maximum loop depth supported.
100 "loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden,
101 cl::desc("Maximum depth of loop nest considered for the transform"));
102
103// We prefer cache cost to vectorization by default.
105 "loop-interchange-profitabilities", cl::ZeroOrMore,
107 cl::desc("List of profitability heuristics to be used. They are applied in "
108 "the given order"),
109 cl::list_init<RuleTy>({RuleTy::PerLoopCacheAnalysis,
110 RuleTy::PerInstrOrderCost,
111 RuleTy::ForVectorization}),
112 cl::values(clEnumValN(RuleTy::PerLoopCacheAnalysis, "cache",
113 "Prioritize loop cache cost"),
114 clEnumValN(RuleTy::PerInstrOrderCost, "instorder",
115 "Prioritize the IVs order of each instruction"),
116 clEnumValN(RuleTy::ForVectorization, "vectorize",
117 "Prioritize vectorization"),
118 clEnumValN(RuleTy::Ignore, "ignore",
119 "Ignore profitability, force interchange (does not "
120 "work with other options)")));
121
122#ifndef NDEBUG
125 for (RuleTy Rule : Rules) {
126 if (!Set.insert(Rule).second)
127 return false;
128 if (Rule == RuleTy::Ignore)
129 return false;
130 }
131 return true;
132}
133
134static void printDepMatrix(CharMatrix &DepMatrix) {
135 for (auto &Row : DepMatrix) {
136 // Drop the last element because it is a flag indicating whether this is
137 // forward dependency or not, which doesn't affect the legality check.
138 for (char D : drop_end(Row))
139 LLVM_DEBUG(dbgs() << D << " ");
140 LLVM_DEBUG(dbgs() << "\n");
141 }
142}
143
144/// Return true if \p Src appears before \p Dst in the same basic block.
145/// Precondition: \p Src and \Dst are distinct instructions within the same
146/// basic block.
147static bool inThisOrder(const Instruction *Src, const Instruction *Dst) {
148 assert(Src->getParent() == Dst->getParent() && Src != Dst &&
149 "Expected Src and Dst to be different instructions in the same BB");
150
151 bool FoundSrc = false;
152 for (const Instruction &I : *(Src->getParent())) {
153 if (&I == Src) {
154 FoundSrc = true;
155 continue;
156 }
157 if (&I == Dst)
158 return FoundSrc;
159 }
160
161 llvm_unreachable("Dst not found");
162}
163#endif
164
165static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
166 Loop *L, DependenceInfo *DI,
167 ScalarEvolution *SE,
169 using ValueVector = SmallVector<Value *, 16>;
170
171 ValueVector MemInstr;
172
173 // For each block.
174 for (BasicBlock *BB : L->blocks()) {
175 // Scan the BB and collect legal loads and stores.
176 for (Instruction &I : *BB) {
177 if (!isa<Instruction>(I))
178 return false;
179 if (auto *Ld = dyn_cast<LoadInst>(&I)) {
180 if (!Ld->isSimple())
181 return false;
182 MemInstr.push_back(&I);
183 } else if (auto *St = dyn_cast<StoreInst>(&I)) {
184 if (!St->isSimple())
185 return false;
186 MemInstr.push_back(&I);
187 }
188 }
189 }
190
191 LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()
192 << " Loads and Stores to analyze\n");
193 if (MemInstr.size() > MaxMemInstrCount) {
194 LLVM_DEBUG(dbgs() << "The transform doesn't support more than "
195 << MaxMemInstrCount << " load/stores in a loop\n");
196 ORE->emit([&]() {
197 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedLoop",
198 L->getStartLoc(), L->getHeader())
199 << "Number of loads/stores exceeded, the supported maximum "
200 "can be increased with option "
201 "-loop-interchange-maxmeminstr-count.";
202 });
203 return false;
204 }
205 ValueVector::iterator I, IE, J, JE;
206
207 // Manage direction vectors that are already seen. Map each direction vector
208 // to an index of DepMatrix at which it is stored.
210
211 for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
212 for (J = I, JE = MemInstr.end(); J != JE; ++J) {
213 std::vector<char> Dep;
216 // Ignore Input dependencies.
217 if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
218 continue;
219 // Track Output, Flow, and Anti dependencies.
220 if (auto D = DI->depends(Src, Dst)) {
221 assert(D->isOrdered() && "Expected an output, flow or anti dep.");
222 // If the direction vector is negative, normalize it to
223 // make it non-negative.
224 if (D->normalize(SE))
225 LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n");
226 LLVM_DEBUG(StringRef DepType =
227 D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
228 dbgs() << "Found " << DepType
229 << " dependency between Src and Dst\n"
230 << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
231 unsigned Levels = D->getLevels();
232 char Direction;
233 for (unsigned II = 1; II <= Levels; ++II) {
234 // `DVEntry::LE` is converted to `*`. This is because `LE` means `<`
235 // or `=`, for which we don't have an equivalent representation, so
236 // that the conservative approximation is necessary. The same goes for
237 // `DVEntry::GE`.
238 // TODO: Use of fine-grained expressions allows for more accurate
239 // analysis.
240 unsigned Dir = D->getDirection(II);
241 if (Dir == Dependence::DVEntry::LT)
242 Direction = '<';
243 else if (Dir == Dependence::DVEntry::GT)
244 Direction = '>';
245 else if (Dir == Dependence::DVEntry::EQ)
246 Direction = '=';
247 else
248 Direction = '*';
249 Dep.push_back(Direction);
250 }
251
252 // If the Dependence object doesn't have any information, fill the
253 // dependency vector with '*'.
254 if (D->isConfused()) {
255 assert(Dep.empty() && "Expected empty dependency vector");
256 Dep.assign(Level, '*');
257 }
258
259 while (Dep.size() != Level) {
260 Dep.push_back('I');
261 }
262
263 // If all the elements of any direction vector have only '*', legality
264 // can't be proven. Exit early to save compile time.
265 if (all_of(Dep, [](char C) { return C == '*'; })) {
266 ORE->emit([&]() {
267 return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",
268 L->getStartLoc(), L->getHeader())
269 << "All loops have dependencies in all directions.";
270 });
271 return false;
272 }
273
274 // Test whether the dependency is forward or not.
275 bool IsKnownForward = true;
276 if (Src->getParent() != Dst->getParent()) {
277 // In general, when Src and Dst are in different BBs, the execution
278 // order of them within a single iteration is not guaranteed. Treat
279 // conservatively as not-forward dependency in this case.
280 IsKnownForward = false;
281 } else {
282 // Src and Dst are in the same BB. If they are the different
283 // instructions, Src should appear before Dst in the BB as they are
284 // stored to MemInstr in that order.
285 assert((Src == Dst || inThisOrder(Src, Dst)) &&
286 "Unexpected instructions");
287
288 // If the Dependence object is reversed (due to normalization), it
289 // represents the dependency from Dst to Src, meaning it is a backward
290 // dependency. Otherwise it should be a forward dependency.
291 bool IsReversed = D->getSrc() != Src;
292 if (IsReversed)
293 IsKnownForward = false;
294 }
295
296 // Initialize the last element. Assume forward dependencies only; it
297 // will be updated later if there is any non-forward dependency.
298 Dep.push_back('<');
299
300 // The last element should express the "summary" among one or more
301 // direction vectors whose first N elements are the same (where N is
302 // the depth of the loop nest). Hence we exclude the last element from
303 // the Seen map.
304 auto [Ite, Inserted] = Seen.try_emplace(
305 StringRef(Dep.data(), Dep.size() - 1), DepMatrix.size());
306
307 // Make sure we only add unique entries to the dependency matrix.
308 if (Inserted)
309 DepMatrix.push_back(Dep);
310
311 // If we cannot prove that this dependency is forward, change the last
312 // element of the corresponding entry. Since a `[... *]` dependency
313 // includes a `[... <]` dependency, we do not need to keep both and
314 // change the existing entry instead.
315 if (!IsKnownForward)
316 DepMatrix[Ite->second].back() = '*';
317 }
318 }
319 }
320
321 return true;
322}
323
324// A loop is moved from index 'from' to an index 'to'. Update the Dependence
325// matrix by exchanging the two columns.
326static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
327 unsigned ToIndx) {
328 for (auto &Row : DepMatrix)
329 std::swap(Row[ToIndx], Row[FromIndx]);
330}
331
332// Check if a direction vector is lexicographically positive. Return true if it
333// is positive, nullopt if it is "zero", otherwise false.
334// [Theorem] A permutation of the loops in a perfect nest is legal if and only
335// if the direction matrix, after the same permutation is applied to its
336// columns, has no ">" direction as the leftmost non-"=" direction in any row.
337static std::optional<bool>
338isLexicographicallyPositive(ArrayRef<char> DV, unsigned Begin, unsigned End) {
339 for (unsigned char Direction : DV.slice(Begin, End - Begin)) {
340 if (Direction == '<')
341 return true;
342 if (Direction == '>' || Direction == '*')
343 return false;
344 }
345 return std::nullopt;
346}
347
348// Checks if it is legal to interchange 2 loops.
349static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
350 unsigned InnerLoopId,
351 unsigned OuterLoopId) {
352 unsigned NumRows = DepMatrix.size();
353 std::vector<char> Cur;
354 // For each row check if it is valid to interchange.
355 for (unsigned Row = 0; Row < NumRows; ++Row) {
356 // Create temporary DepVector check its lexicographical order
357 // before and after swapping OuterLoop vs InnerLoop
358 Cur = DepMatrix[Row];
359
360 // If the surrounding loops already ensure that the direction vector is
361 // lexicographically positive, nothing within the loop will be able to break
362 // the dependence. In such a case we can skip the subsequent check.
363 if (isLexicographicallyPositive(Cur, 0, OuterLoopId) == true)
364 continue;
365
366 // Check if the direction vector is lexicographically positive (or zero)
367 // for both before/after exchanged. Ignore the last element because it
368 // doesn't affect the legality.
369 if (isLexicographicallyPositive(Cur, OuterLoopId, Cur.size() - 1) == false)
370 return false;
371 std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
372 if (isLexicographicallyPositive(Cur, OuterLoopId, Cur.size() - 1) == false)
373 return false;
374 }
375 return true;
376}
377
378static void populateWorklist(Loop &L, LoopVector &LoopList) {
379 LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
380 << L.getHeader()->getParent()->getName() << " Loop: %"
381 << L.getHeader()->getName() << '\n');
382 assert(LoopList.empty() && "LoopList should initially be empty!");
383 Loop *CurrentLoop = &L;
384 const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
385 while (!Vec->empty()) {
386 // The current loop has multiple subloops in it hence it is not tightly
387 // nested.
388 // Discard all loops above it added into Worklist.
389 if (Vec->size() != 1) {
390 LoopList = {};
391 return;
392 }
393
394 LoopList.push_back(CurrentLoop);
395 CurrentLoop = Vec->front();
396 Vec = &CurrentLoop->getSubLoops();
397 }
398 LoopList.push_back(CurrentLoop);
399}
400
403 unsigned LoopNestDepth = LoopList.size();
404 if (LoopNestDepth < MinLoopNestDepth || LoopNestDepth > MaxLoopNestDepth) {
405 LLVM_DEBUG(dbgs() << "Unsupported depth of loop nest " << LoopNestDepth
406 << ", the supported range is [" << MinLoopNestDepth
407 << ", " << MaxLoopNestDepth << "].\n");
408 Loop *OuterLoop = LoopList.front();
409 ORE.emit([&]() {
410 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedLoopNestDepth",
411 OuterLoop->getStartLoc(),
412 OuterLoop->getHeader())
413 << "Unsupported depth of loop nest, the supported range is ["
414 << std::to_string(MinLoopNestDepth) << ", "
415 << std::to_string(MaxLoopNestDepth) << "].\n";
416 });
417 return false;
418 }
419 return true;
420}
421
423 ArrayRef<Loop *> LoopList) {
424 for (Loop *L : LoopList) {
425 const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
426 if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
427 LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
428 return false;
429 }
430 if (L->getNumBackEdges() != 1) {
431 LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
432 return false;
433 }
434 if (!L->getExitingBlock()) {
435 LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
436 return false;
437 }
438 }
439 return true;
440}
441
442namespace {
443
444/// LoopInterchangeLegality checks if it is legal to interchange the loop.
445class LoopInterchangeLegality {
446public:
447 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
448 OptimizationRemarkEmitter *ORE)
449 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
450
451 /// Check if the loops can be interchanged.
452 bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
453 CharMatrix &DepMatrix);
454
455 /// Discover induction PHIs in the header of \p L. Induction
456 /// PHIs are added to \p Inductions.
457 bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
458
459 /// Check if the loop structure is understood. We do not handle triangular
460 /// loops for now.
461 bool isLoopStructureUnderstood();
462
463 bool currentLimitations();
464
465 const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
466 return OuterInnerReductions;
467 }
468
469 const ArrayRef<PHINode *> getInnerLoopInductions() const {
470 return InnerLoopInductions;
471 }
472
473 ArrayRef<Instruction *> getHasNoWrapReductions() const {
474 return HasNoWrapReductions;
475 }
476
477private:
478 bool tightlyNested(Loop *Outer, Loop *Inner);
479 bool containsUnsafeInstructions(BasicBlock *BB);
480
481 /// Discover induction and reduction PHIs in the header of \p L. Induction
482 /// PHIs are added to \p Inductions, reductions are added to
483 /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
484 /// to be passed as \p InnerLoop.
485 bool findInductionAndReductions(Loop *L,
486 SmallVector<PHINode *, 8> &Inductions,
487 Loop *InnerLoop);
488
489 Loop *OuterLoop;
490 Loop *InnerLoop;
491
492 ScalarEvolution *SE;
493
494 /// Interface to emit optimization remarks.
495 OptimizationRemarkEmitter *ORE;
496
497 /// Set of reduction PHIs taking part of a reduction across the inner and
498 /// outer loop.
499 SmallPtrSet<PHINode *, 4> OuterInnerReductions;
500
501 /// Set of inner loop induction PHIs
502 SmallVector<PHINode *, 8> InnerLoopInductions;
503
504 /// Hold instructions that have nuw/nsw flags and involved in reductions,
505 /// like integer addition/multiplication. Those flags must be dropped when
506 /// interchanging the loops.
507 SmallVector<Instruction *, 4> HasNoWrapReductions;
508};
509
510/// Manages information utilized by the profitability check for cache. The main
511/// purpose of this class is to delay the computation of CacheCost until it is
512/// actually needed.
513class CacheCostManager {
514 Loop *OutermostLoop;
515 LoopStandardAnalysisResults *AR;
516 DependenceInfo *DI;
517
518 /// CacheCost for \ref OutermostLoop. Once it is computed, it is cached. Note
519 /// that the result can be nullptr.
520 std::optional<std::unique_ptr<CacheCost>> CC;
521
522 /// Maps each loop to an index representing the optimal position within the
523 /// loop-nest, as determined by the cache cost analysis.
524 DenseMap<const Loop *, unsigned> CostMap;
525
526 void computeIfUnitinialized();
527
528public:
529 CacheCostManager(Loop *OutermostLoop, LoopStandardAnalysisResults *AR,
530 DependenceInfo *DI)
531 : OutermostLoop(OutermostLoop), AR(AR), DI(DI) {}
532 CacheCost *getCacheCost();
533 const DenseMap<const Loop *, unsigned> &getCostMap();
534};
535
536/// LoopInterchangeProfitability checks if it is profitable to interchange the
537/// loop.
538class LoopInterchangeProfitability {
539public:
540 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
541 OptimizationRemarkEmitter *ORE)
542 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
543
544 /// Check if the loop interchange is profitable.
545 bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
546 unsigned InnerLoopId, unsigned OuterLoopId,
547 CharMatrix &DepMatrix, CacheCostManager &CCM);
548
549private:
550 int getInstrOrderCost();
551 std::optional<bool> isProfitablePerLoopCacheAnalysis(
552 const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC);
553 std::optional<bool> isProfitablePerInstrOrderCost();
554 std::optional<bool> isProfitableForVectorization(unsigned InnerLoopId,
555 unsigned OuterLoopId,
556 CharMatrix &DepMatrix);
557 Loop *OuterLoop;
558 Loop *InnerLoop;
559
560 /// Scev analysis.
561 ScalarEvolution *SE;
562
563 /// Interface to emit optimization remarks.
564 OptimizationRemarkEmitter *ORE;
565};
566
567/// LoopInterchangeTransform interchanges the loop.
568class LoopInterchangeTransform {
569public:
570 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
571 LoopInfo *LI, DominatorTree *DT,
572 const LoopInterchangeLegality &LIL)
573 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
574
575 /// Interchange OuterLoop and InnerLoop.
576 bool transform(ArrayRef<Instruction *> DropNoWrapInsts);
577 void restructureLoops(Loop *NewInner, Loop *NewOuter,
578 BasicBlock *OrigInnerPreHeader,
579 BasicBlock *OrigOuterPreHeader);
580 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
581
582private:
583 bool adjustLoopLinks();
584 bool adjustLoopBranches();
585
586 Loop *OuterLoop;
587 Loop *InnerLoop;
588
589 /// Scev analysis.
590 ScalarEvolution *SE;
591
592 LoopInfo *LI;
593 DominatorTree *DT;
594
595 const LoopInterchangeLegality &LIL;
596};
597
598struct LoopInterchange {
599 ScalarEvolution *SE = nullptr;
600 LoopInfo *LI = nullptr;
601 DependenceInfo *DI = nullptr;
602 DominatorTree *DT = nullptr;
603 LoopStandardAnalysisResults *AR = nullptr;
604
605 /// Interface to emit optimization remarks.
606 OptimizationRemarkEmitter *ORE;
607
608 LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
609 DominatorTree *DT, LoopStandardAnalysisResults *AR,
610 OptimizationRemarkEmitter *ORE)
611 : SE(SE), LI(LI), DI(DI), DT(DT), AR(AR), ORE(ORE) {}
612
613 bool run(Loop *L) {
614 if (L->getParentLoop())
615 return false;
616 SmallVector<Loop *, 8> LoopList;
617 populateWorklist(*L, LoopList);
618 return processLoopList(LoopList);
619 }
620
621 bool run(LoopNest &LN) {
622 SmallVector<Loop *, 8> LoopList(LN.getLoops());
623 for (unsigned I = 1; I < LoopList.size(); ++I)
624 if (LoopList[I]->getParentLoop() != LoopList[I - 1])
625 return false;
626 return processLoopList(LoopList);
627 }
628
629 unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
630 // TODO: Add a better heuristic to select the loop to be interchanged based
631 // on the dependence matrix. Currently we select the innermost loop.
632 return LoopList.size() - 1;
633 }
634
635 bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
636 bool Changed = false;
637
638 // Ensure proper loop nest depth.
639 assert(hasSupportedLoopDepth(LoopList, *ORE) &&
640 "Unsupported depth of loop nest.");
641
642 unsigned LoopNestDepth = LoopList.size();
643
644 LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
645 << "\n");
646
647 CharMatrix DependencyMatrix;
648 Loop *OuterMostLoop = *(LoopList.begin());
649 if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
650 OuterMostLoop, DI, SE, ORE)) {
651 LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
652 return false;
653 }
654
655 LLVM_DEBUG(dbgs() << "Dependency matrix before interchange:\n";
656 printDepMatrix(DependencyMatrix));
657
658 // Get the Outermost loop exit.
659 BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
660 if (!LoopNestExit) {
661 LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block");
662 return false;
663 }
664
665 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
666 CacheCostManager CCM(LoopList[0], AR, DI);
667 // We try to achieve the globally optimal memory access for the loopnest,
668 // and do interchange based on a bubble-sort fasion. We start from
669 // the innermost loop, move it outwards to the best possible position
670 // and repeat this process.
671 for (unsigned j = SelecLoopId; j > 0; j--) {
672 bool ChangedPerIter = false;
673 for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
674 bool Interchanged =
675 processLoop(LoopList, i, i - 1, DependencyMatrix, CCM);
676 ChangedPerIter |= Interchanged;
677 Changed |= Interchanged;
678 }
679 // Early abort if there was no interchange during an entire round of
680 // moving loops outwards.
681 if (!ChangedPerIter)
682 break;
683 }
684 return Changed;
685 }
686
687 bool processLoop(SmallVectorImpl<Loop *> &LoopList, unsigned InnerLoopId,
688 unsigned OuterLoopId,
689 std::vector<std::vector<char>> &DependencyMatrix,
690 CacheCostManager &CCM) {
691 Loop *OuterLoop = LoopList[OuterLoopId];
692 Loop *InnerLoop = LoopList[InnerLoopId];
693 LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
694 << " and OuterLoopId = " << OuterLoopId << "\n");
695 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
696 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
697 LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
698 return false;
699 }
700 LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
701 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
702 if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
703 DependencyMatrix, CCM)) {
704 LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
705 return false;
706 }
707
708 ORE->emit([&]() {
709 return OptimizationRemark(DEBUG_TYPE, "Interchanged",
710 InnerLoop->getStartLoc(),
711 InnerLoop->getHeader())
712 << "Loop interchanged with enclosing loop.";
713 });
714
715 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
716 LIT.transform(LIL.getHasNoWrapReductions());
717 LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
718 LoopsInterchanged++;
719
720 llvm::formLCSSARecursively(*OuterLoop, *DT, LI, SE);
721
722 // Loops interchanged, update LoopList accordingly.
723 std::swap(LoopList[OuterLoopId], LoopList[InnerLoopId]);
724 // Update the DependencyMatrix
725 interChangeDependencies(DependencyMatrix, InnerLoopId, OuterLoopId);
726
727 LLVM_DEBUG(dbgs() << "Dependency matrix after interchange:\n";
728 printDepMatrix(DependencyMatrix));
729
730 return true;
731 }
732};
733
734} // end anonymous namespace
735
736bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
737 return any_of(*BB, [](const Instruction &I) {
738 return I.mayHaveSideEffects() || I.mayReadFromMemory();
739 });
740}
741
742bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
743 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
744 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
745 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
746
747 LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n");
748
749 // A perfectly nested loop will not have any branch in between the outer and
750 // inner block i.e. outer header will branch to either inner preheader and
751 // outerloop latch.
752 BranchInst *OuterLoopHeaderBI =
753 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
754 if (!OuterLoopHeaderBI)
755 return false;
756
757 for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
758 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
759 Succ != OuterLoopLatch)
760 return false;
761
762 LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
763 // We do not have any basic block in between now make sure the outer header
764 // and outer loop latch doesn't contain any unsafe instructions.
765 if (containsUnsafeInstructions(OuterLoopHeader) ||
766 containsUnsafeInstructions(OuterLoopLatch))
767 return false;
768
769 // Also make sure the inner loop preheader does not contain any unsafe
770 // instructions. Note that all instructions in the preheader will be moved to
771 // the outer loop header when interchanging.
772 if (InnerLoopPreHeader != OuterLoopHeader &&
773 containsUnsafeInstructions(InnerLoopPreHeader))
774 return false;
775
776 BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
777 // Ensure the inner loop exit block flows to the outer loop latch possibly
778 // through empty blocks.
779 const BasicBlock &SuccInner =
780 LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch);
781 if (&SuccInner != OuterLoopLatch) {
782 LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExit
783 << " does not lead to the outer loop latch.\n";);
784 return false;
785 }
786 // The inner loop exit block does flow to the outer loop latch and not some
787 // other BBs, now make sure it contains safe instructions, since it will be
788 // moved into the (new) inner loop after interchange.
789 if (containsUnsafeInstructions(InnerLoopExit))
790 return false;
791
792 LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
793 // We have a perfect loop nest.
794 return true;
795}
796
797bool LoopInterchangeLegality::isLoopStructureUnderstood() {
798 BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
799 for (PHINode *InnerInduction : InnerLoopInductions) {
800 unsigned Num = InnerInduction->getNumOperands();
801 for (unsigned i = 0; i < Num; ++i) {
802 Value *Val = InnerInduction->getOperand(i);
803 if (isa<Constant>(Val))
804 continue;
806 if (!I)
807 return false;
808 // TODO: Handle triangular loops.
809 // e.g. for(int i=0;i<N;i++)
810 // for(int j=i;j<N;j++)
811 unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
812 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
813 InnerLoopPreheader &&
814 !OuterLoop->isLoopInvariant(I)) {
815 return false;
816 }
817 }
818 }
819
820 // TODO: Handle triangular loops of another form.
821 // e.g. for(int i=0;i<N;i++)
822 // for(int j=0;j<i;j++)
823 // or,
824 // for(int i=0;i<N;i++)
825 // for(int j=0;j*i<N;j++)
826 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
827 BranchInst *InnerLoopLatchBI =
828 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
829 if (!InnerLoopLatchBI->isConditional())
830 return false;
831 if (CmpInst *InnerLoopCmp =
832 dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) {
833 Value *Op0 = InnerLoopCmp->getOperand(0);
834 Value *Op1 = InnerLoopCmp->getOperand(1);
835
836 // LHS and RHS of the inner loop exit condition, e.g.,
837 // in "for(int j=0;j<i;j++)", LHS is j and RHS is i.
838 Value *Left = nullptr;
839 Value *Right = nullptr;
840
841 // Check if V only involves inner loop induction variable.
842 // Return true if V is InnerInduction, or a cast from
843 // InnerInduction, or a binary operator that involves
844 // InnerInduction and a constant.
845 std::function<bool(Value *)> IsPathToInnerIndVar;
846 IsPathToInnerIndVar = [this, &IsPathToInnerIndVar](const Value *V) -> bool {
847 if (llvm::is_contained(InnerLoopInductions, V))
848 return true;
849 if (isa<Constant>(V))
850 return true;
852 if (!I)
853 return false;
854 if (isa<CastInst>(I))
855 return IsPathToInnerIndVar(I->getOperand(0));
857 return IsPathToInnerIndVar(I->getOperand(0)) &&
858 IsPathToInnerIndVar(I->getOperand(1));
859 return false;
860 };
861
862 // In case of multiple inner loop indvars, it is okay if LHS and RHS
863 // are both inner indvar related variables.
864 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
865 return true;
866
867 // Otherwise we check if the cmp instruction compares an inner indvar
868 // related variable (Left) with a outer loop invariant (Right).
869 if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
870 Left = Op0;
871 Right = Op1;
872 } else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
873 Left = Op1;
874 Right = Op0;
875 }
876
877 if (Left == nullptr)
878 return false;
879
880 const SCEV *S = SE->getSCEV(Right);
881 if (!SE->isLoopInvariant(S, OuterLoop))
882 return false;
883 }
884
885 return true;
886}
887
888// If SV is a LCSSA PHI node with a single incoming value, return the incoming
889// value.
890static Value *followLCSSA(Value *SV) {
892 if (!PHI)
893 return SV;
894
895 if (PHI->getNumIncomingValues() != 1)
896 return SV;
897 return followLCSSA(PHI->getIncomingValue(0));
898}
899
900// Check V's users to see if it is involved in a reduction in L.
901static PHINode *
903 SmallVectorImpl<Instruction *> &HasNoWrapInsts) {
904 // Reduction variables cannot be constants.
905 if (isa<Constant>(V))
906 return nullptr;
907
908 for (Value *User : V->users()) {
910 if (PHI->getNumIncomingValues() == 1)
911 continue;
914 // Detect floating point reduction only when it can be reordered.
915 if (RD.getExactFPMathInst() != nullptr)
916 return nullptr;
917
919 switch (RK) {
920 case RecurKind::Or:
921 case RecurKind::And:
922 case RecurKind::Xor:
923 case RecurKind::SMin:
924 case RecurKind::SMax:
925 case RecurKind::UMin:
926 case RecurKind::UMax:
927 case RecurKind::FAdd:
928 case RecurKind::FMul:
929 case RecurKind::FMin:
930 case RecurKind::FMax:
936 case RecurKind::AnyOf:
937 return PHI;
938
939 // Change the order of integer addition/multiplication may change the
940 // semantics. Consider the following case:
941 //
942 // int A[2][2] = {{ INT_MAX, INT_MAX }, { INT_MIN, INT_MIN }};
943 // int sum = 0;
944 // for (int i = 0; i < 2; i++)
945 // for (int j = 0; j < 2; j++)
946 // sum += A[j][i];
947 //
948 // If the above loops are exchanged, the addition will cause an
949 // overflow. To prevent this, we must drop the nuw/nsw flags from the
950 // addition/multiplication instructions when we actually exchanges the
951 // loops.
952 case RecurKind::Add:
953 case RecurKind::Mul: {
954 unsigned OpCode = RecurrenceDescriptor::getOpcode(RK);
956
957 // Bail out when we fail to collect reduction instructions chain.
958 if (Ops.empty())
959 return nullptr;
960
961 for (Instruction *I : Ops) {
962 assert(I->getOpcode() == OpCode &&
963 "Expected the instruction to be the reduction operation");
964 (void)OpCode;
965
966 // If the instruction has nuw/nsw flags, we must drop them when the
967 // transformation is actually performed.
968 if (I->hasNoSignedWrap() || I->hasNoUnsignedWrap())
969 HasNoWrapInsts.push_back(I);
970 }
971 return PHI;
972 }
973
974 default:
975 return nullptr;
976 }
977 }
978 return nullptr;
979 }
980 }
981
982 return nullptr;
983}
984
985bool LoopInterchangeLegality::findInductionAndReductions(
986 Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
987 if (!L->getLoopLatch() || !L->getLoopPredecessor())
988 return false;
989 for (PHINode &PHI : L->getHeader()->phis()) {
990 InductionDescriptor ID;
992 Inductions.push_back(&PHI);
993 else {
994 // PHIs in inner loops need to be part of a reduction in the outer loop,
995 // discovered when checking the PHIs of the outer loop earlier.
996 if (!InnerLoop) {
997 if (!OuterInnerReductions.count(&PHI)) {
998 LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
999 "across the outer loop.\n");
1000 return false;
1001 }
1002 } else {
1003 assert(PHI.getNumIncomingValues() == 2 &&
1004 "Phis in loop header should have exactly 2 incoming values");
1005 // Check if we have a PHI node in the outer loop that has a reduction
1006 // result from the inner loop as an incoming value.
1007 Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
1008 PHINode *InnerRedPhi =
1009 findInnerReductionPhi(InnerLoop, V, HasNoWrapReductions);
1010 if (!InnerRedPhi ||
1011 !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
1012 LLVM_DEBUG(
1013 dbgs()
1014 << "Failed to recognize PHI as an induction or reduction.\n");
1015 return false;
1016 }
1017 OuterInnerReductions.insert(&PHI);
1018 OuterInnerReductions.insert(InnerRedPhi);
1019 }
1020 }
1021 }
1022 return true;
1023}
1024
1025// This function indicates the current limitations in the transform as a result
1026// of which we do not proceed.
1027bool LoopInterchangeLegality::currentLimitations() {
1028 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1029
1030 // transform currently expects the loop latches to also be the exiting
1031 // blocks.
1032 if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
1033 OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
1034 !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
1035 !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
1036 LLVM_DEBUG(
1037 dbgs() << "Loops where the latch is not the exiting block are not"
1038 << " supported currently.\n");
1039 ORE->emit([&]() {
1040 return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch",
1041 OuterLoop->getStartLoc(),
1042 OuterLoop->getHeader())
1043 << "Loops where the latch is not the exiting block cannot be"
1044 " interchange currently.";
1045 });
1046 return true;
1047 }
1048
1049 SmallVector<PHINode *, 8> Inductions;
1050 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
1051 LLVM_DEBUG(
1052 dbgs() << "Only outer loops with induction or reduction PHI nodes "
1053 << "are supported currently.\n");
1054 ORE->emit([&]() {
1055 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
1056 OuterLoop->getStartLoc(),
1057 OuterLoop->getHeader())
1058 << "Only outer loops with induction or reduction PHI nodes can be"
1059 " interchanged currently.";
1060 });
1061 return true;
1062 }
1063
1064 Inductions.clear();
1065 // For multi-level loop nests, make sure that all phi nodes for inner loops
1066 // at all levels can be recognized as a induction or reduction phi. Bail out
1067 // if a phi node at a certain nesting level cannot be properly recognized.
1068 Loop *CurLevelLoop = OuterLoop;
1069 while (!CurLevelLoop->getSubLoops().empty()) {
1070 // We already made sure that the loop nest is tightly nested.
1071 CurLevelLoop = CurLevelLoop->getSubLoops().front();
1072 if (!findInductionAndReductions(CurLevelLoop, Inductions, nullptr)) {
1073 LLVM_DEBUG(
1074 dbgs() << "Only inner loops with induction or reduction PHI nodes "
1075 << "are supported currently.\n");
1076 ORE->emit([&]() {
1077 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
1078 CurLevelLoop->getStartLoc(),
1079 CurLevelLoop->getHeader())
1080 << "Only inner loops with induction or reduction PHI nodes can be"
1081 " interchange currently.";
1082 });
1083 return true;
1084 }
1085 }
1086
1087 // TODO: Triangular loops are not handled for now.
1088 if (!isLoopStructureUnderstood()) {
1089 LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n");
1090 ORE->emit([&]() {
1091 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner",
1092 InnerLoop->getStartLoc(),
1093 InnerLoop->getHeader())
1094 << "Inner loop structure not understood currently.";
1095 });
1096 return true;
1097 }
1098
1099 return false;
1100}
1101
1102bool LoopInterchangeLegality::findInductions(
1103 Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
1104 for (PHINode &PHI : L->getHeader()->phis()) {
1105 InductionDescriptor ID;
1107 Inductions.push_back(&PHI);
1108 }
1109 return !Inductions.empty();
1110}
1111
1112// We currently only support LCSSA PHI nodes in the inner loop exit, if their
1113// users are either reduction PHIs or PHIs outside the outer loop (which means
1114// the we are only interested in the final value after the loop).
1115static bool
1118 BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
1119 for (PHINode &PHI : InnerExit->phis()) {
1120 // Reduction lcssa phi will have only 1 incoming block that from loop latch.
1121 if (PHI.getNumIncomingValues() > 1)
1122 return false;
1123 if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
1124 PHINode *PN = dyn_cast<PHINode>(U);
1125 return !PN ||
1126 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
1127 })) {
1128 return false;
1129 }
1130 }
1131 return true;
1132}
1133
1134// We currently support LCSSA PHI nodes in the outer loop exit, if their
1135// incoming values do not come from the outer loop latch or if the
1136// outer loop latch has a single predecessor. In that case, the value will
1137// be available if both the inner and outer loop conditions are true, which
1138// will still be true after interchanging. If we have multiple predecessor,
1139// that may not be the case, e.g. because the outer loop latch may be executed
1140// if the inner loop is not executed.
1141static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
1142 BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
1143 for (PHINode &PHI : LoopNestExit->phis()) {
1144 for (Value *Incoming : PHI.incoming_values()) {
1146 if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
1147 continue;
1148
1149 // The incoming value is defined in the outer loop latch. Currently we
1150 // only support that in case the outer loop latch has a single predecessor.
1151 // This guarantees that the outer loop latch is executed if and only if
1152 // the inner loop is executed (because tightlyNested() guarantees that the
1153 // outer loop header only branches to the inner loop or the outer loop
1154 // latch).
1155 // FIXME: We could weaken this logic and allow multiple predecessors,
1156 // if the values are produced outside the loop latch. We would need
1157 // additional logic to update the PHI nodes in the exit block as
1158 // well.
1159 if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
1160 return false;
1161 }
1162 }
1163 return true;
1164}
1165
1166// In case of multi-level nested loops, it may occur that lcssa phis exist in
1167// the latch of InnerLoop, i.e., when defs of the incoming values are further
1168// inside the loopnest. Sometimes those incoming values are not available
1169// after interchange, since the original inner latch will become the new outer
1170// latch which may have predecessor paths that do not include those incoming
1171// values.
1172// TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of
1173// multi-level loop nests.
1174static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
1175 if (InnerLoop->getSubLoops().empty())
1176 return true;
1177 // If the original outer latch has only one predecessor, then values defined
1178 // further inside the looploop, e.g., in the innermost loop, will be available
1179 // at the new outer latch after interchange.
1180 if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr)
1181 return true;
1182
1183 // The outer latch has more than one predecessors, i.e., the inner
1184 // exit and the inner header.
1185 // PHI nodes in the inner latch are lcssa phis where the incoming values
1186 // are defined further inside the loopnest. Check if those phis are used
1187 // in the original inner latch. If that is the case then bail out since
1188 // those incoming values may not be available at the new outer latch.
1189 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1190 for (PHINode &PHI : InnerLoopLatch->phis()) {
1191 for (auto *U : PHI.users()) {
1193 if (InnerLoopLatch == UI->getParent())
1194 return false;
1195 }
1196 }
1197 return true;
1198}
1199
1200bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
1201 unsigned OuterLoopId,
1202 CharMatrix &DepMatrix) {
1203 if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
1204 LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
1205 << " and OuterLoopId = " << OuterLoopId
1206 << " due to dependence\n");
1207 ORE->emit([&]() {
1208 return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",
1209 InnerLoop->getStartLoc(),
1210 InnerLoop->getHeader())
1211 << "Cannot interchange loops due to dependences.";
1212 });
1213 return false;
1214 }
1215 // Check if outer and inner loop contain legal instructions only.
1216 for (auto *BB : OuterLoop->blocks())
1217 for (Instruction &I : BB->instructionsWithoutDebug())
1218 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1219 // readnone functions do not prevent interchanging.
1220 if (CI->onlyWritesMemory())
1221 continue;
1222 LLVM_DEBUG(
1223 dbgs() << "Loops with call instructions cannot be interchanged "
1224 << "safely.");
1225 ORE->emit([&]() {
1226 return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst",
1227 CI->getDebugLoc(),
1228 CI->getParent())
1229 << "Cannot interchange loops due to call instruction.";
1230 });
1231
1232 return false;
1233 }
1234
1235 if (!findInductions(InnerLoop, InnerLoopInductions)) {
1236 LLVM_DEBUG(dbgs() << "Could not find inner loop induction variables.\n");
1237 return false;
1238 }
1239
1240 if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) {
1241 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n");
1242 ORE->emit([&]() {
1243 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedInnerLatchPHI",
1244 InnerLoop->getStartLoc(),
1245 InnerLoop->getHeader())
1246 << "Cannot interchange loops because unsupported PHI nodes found "
1247 "in inner loop latch.";
1248 });
1249 return false;
1250 }
1251
1252 // TODO: The loops could not be interchanged due to current limitations in the
1253 // transform module.
1254 if (currentLimitations()) {
1255 LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n");
1256 return false;
1257 }
1258
1259 // Check if the loops are tightly nested.
1260 if (!tightlyNested(OuterLoop, InnerLoop)) {
1261 LLVM_DEBUG(dbgs() << "Loops not tightly nested\n");
1262 ORE->emit([&]() {
1263 return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested",
1264 InnerLoop->getStartLoc(),
1265 InnerLoop->getHeader())
1266 << "Cannot interchange loops because they are not tightly "
1267 "nested.";
1268 });
1269 return false;
1270 }
1271
1272 if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1273 OuterInnerReductions)) {
1274 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n");
1275 ORE->emit([&]() {
1276 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1277 InnerLoop->getStartLoc(),
1278 InnerLoop->getHeader())
1279 << "Found unsupported PHI node in loop exit.";
1280 });
1281 return false;
1282 }
1283
1284 if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1285 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
1286 ORE->emit([&]() {
1287 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1288 OuterLoop->getStartLoc(),
1289 OuterLoop->getHeader())
1290 << "Found unsupported PHI node in loop exit.";
1291 });
1292 return false;
1293 }
1294
1295 return true;
1296}
1297
1298void CacheCostManager::computeIfUnitinialized() {
1299 if (CC.has_value())
1300 return;
1301
1302 LLVM_DEBUG(dbgs() << "Compute CacheCost.\n");
1303 CC = CacheCost::getCacheCost(*OutermostLoop, *AR, *DI);
1304 // Obtain the loop vector returned from loop cache analysis beforehand,
1305 // and put each <Loop, index> pair into a map for constant time query
1306 // later. Indices in loop vector reprsent the optimal order of the
1307 // corresponding loop, e.g., given a loopnest with depth N, index 0
1308 // indicates the loop should be placed as the outermost loop and index N
1309 // indicates the loop should be placed as the innermost loop.
1310 //
1311 // For the old pass manager CacheCost would be null.
1312 if (*CC != nullptr)
1313 for (const auto &[Idx, Cost] : enumerate((*CC)->getLoopCosts()))
1314 CostMap[Cost.first] = Idx;
1315}
1316
1317CacheCost *CacheCostManager::getCacheCost() {
1318 computeIfUnitinialized();
1319 return CC->get();
1320}
1321
1322const DenseMap<const Loop *, unsigned> &CacheCostManager::getCostMap() {
1323 computeIfUnitinialized();
1324 return CostMap;
1325}
1326
1327int LoopInterchangeProfitability::getInstrOrderCost() {
1328 unsigned GoodOrder, BadOrder;
1329 BadOrder = GoodOrder = 0;
1330 for (BasicBlock *BB : InnerLoop->blocks()) {
1331 for (Instruction &Ins : *BB) {
1332 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1333 bool FoundInnerInduction = false;
1334 bool FoundOuterInduction = false;
1335 for (Value *Op : GEP->operands()) {
1336 // Skip operands that are not SCEV-able.
1337 if (!SE->isSCEVable(Op->getType()))
1338 continue;
1339
1340 const SCEV *OperandVal = SE->getSCEV(Op);
1341 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1342 if (!AR)
1343 continue;
1344
1345 // If we find the inner induction after an outer induction e.g.
1346 // for(int i=0;i<N;i++)
1347 // for(int j=0;j<N;j++)
1348 // A[i][j] = A[i-1][j-1]+k;
1349 // then it is a good order.
1350 if (AR->getLoop() == InnerLoop) {
1351 // We found an InnerLoop induction after OuterLoop induction. It is
1352 // a good order.
1353 FoundInnerInduction = true;
1354 if (FoundOuterInduction) {
1355 GoodOrder++;
1356 break;
1357 }
1358 }
1359 // If we find the outer induction after an inner induction e.g.
1360 // for(int i=0;i<N;i++)
1361 // for(int j=0;j<N;j++)
1362 // A[j][i] = A[j-1][i-1]+k;
1363 // then it is a bad order.
1364 if (AR->getLoop() == OuterLoop) {
1365 // We found an OuterLoop induction after InnerLoop induction. It is
1366 // a bad order.
1367 FoundOuterInduction = true;
1368 if (FoundInnerInduction) {
1369 BadOrder++;
1370 break;
1371 }
1372 }
1373 }
1374 }
1375 }
1376 }
1377 return GoodOrder - BadOrder;
1378}
1379
1380std::optional<bool>
1381LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
1382 const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC) {
1383 // This is the new cost model returned from loop cache analysis.
1384 // A smaller index means the loop should be placed an outer loop, and vice
1385 // versa.
1386 auto InnerLoopIt = CostMap.find(InnerLoop);
1387 if (InnerLoopIt == CostMap.end())
1388 return std::nullopt;
1389 auto OuterLoopIt = CostMap.find(OuterLoop);
1390 if (OuterLoopIt == CostMap.end())
1391 return std::nullopt;
1392
1393 if (CC->getLoopCost(*OuterLoop) == CC->getLoopCost(*InnerLoop))
1394 return std::nullopt;
1395 unsigned InnerIndex = InnerLoopIt->second;
1396 unsigned OuterIndex = OuterLoopIt->second;
1397 LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndex
1398 << ", OuterIndex = " << OuterIndex << "\n");
1399 assert(InnerIndex != OuterIndex && "CostMap should assign unique "
1400 "numbers to each loop");
1401 return std::optional<bool>(InnerIndex < OuterIndex);
1402}
1403
1404std::optional<bool>
1405LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
1406 // Legacy cost model: this is rough cost estimation algorithm. It counts the
1407 // good and bad order of induction variables in the instruction and allows
1408 // reordering if number of bad orders is more than good.
1409 int Cost = getInstrOrderCost();
1410 LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
1412 return std::optional<bool>(true);
1413
1414 return std::nullopt;
1415}
1416
1417/// Return true if we can vectorize the loop specified by \p LoopId.
1418static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId) {
1419 for (const auto &Dep : DepMatrix) {
1420 char Dir = Dep[LoopId];
1421 char DepType = Dep.back();
1422 assert((DepType == '<' || DepType == '*') &&
1423 "Unexpected element in dependency vector");
1424
1425 // There are no loop-carried dependencies.
1426 if (Dir == '=' || Dir == 'I')
1427 continue;
1428
1429 // DepType being '<' means that this direction vector represents a forward
1430 // dependency. In principle, a loop with '<' direction can be vectorized in
1431 // this case.
1432 if (Dir == '<' && DepType == '<')
1433 continue;
1434
1435 // We cannot prove that the loop is vectorizable.
1436 return false;
1437 }
1438 return true;
1439}
1440
1441std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1442 unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) {
1443 // If the outer loop cannot be vectorized, it is not profitable to move this
1444 // to inner position.
1445 if (!canVectorize(DepMatrix, OuterLoopId))
1446 return false;
1447
1448 // If the inner loop cannot be vectorized but the outer loop can be, then it
1449 // is profitable to interchange to enable inner loop parallelism.
1450 if (!canVectorize(DepMatrix, InnerLoopId))
1451 return true;
1452
1453 // If both the inner and the outer loop can be vectorized, it is necessary to
1454 // check the cost of each vectorized loop for profitability decision. At this
1455 // time we do not have a cost model to estimate them, so return nullopt.
1456 // TODO: Estimate the cost of vectorized loop when both the outer and the
1457 // inner loop can be vectorized.
1458 return std::nullopt;
1459}
1460
1461bool LoopInterchangeProfitability::isProfitable(
1462 const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
1463 unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
1464
1465 // Return true if interchange is forced and the cost-model ignored.
1466 if (Profitabilities.size() == 1 && Profitabilities[0] == RuleTy::Ignore)
1467 return true;
1469 "Duplicate rules and option 'ignore' are not allowed");
1470
1471 // isProfitable() is structured to avoid endless loop interchange. If the
1472 // highest priority rule (isProfitablePerLoopCacheAnalysis by default) could
1473 // decide the profitability then, profitability check will stop and return the
1474 // analysis result. If it failed to determine it (e.g., cache analysis failed
1475 // to analyze the loopnest due to delinearization issues) then go ahead the
1476 // second highest priority rule (isProfitablePerInstrOrderCost by default).
1477 // Likewise, if it failed to analysis the profitability then only, the last
1478 // rule (isProfitableForVectorization by default) will decide.
1479 std::optional<bool> shouldInterchange;
1480 for (RuleTy RT : Profitabilities) {
1481 switch (RT) {
1482 case RuleTy::PerLoopCacheAnalysis: {
1483 CacheCost *CC = CCM.getCacheCost();
1484 const DenseMap<const Loop *, unsigned> &CostMap = CCM.getCostMap();
1485 shouldInterchange = isProfitablePerLoopCacheAnalysis(CostMap, CC);
1486 break;
1487 }
1488 case RuleTy::PerInstrOrderCost:
1489 shouldInterchange = isProfitablePerInstrOrderCost();
1490 break;
1491 case RuleTy::ForVectorization:
1492 shouldInterchange =
1493 isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
1494 break;
1495 case RuleTy::Ignore:
1496 llvm_unreachable("Option 'ignore' is not supported with other options");
1497 break;
1498 }
1499
1500 // If this rule could determine the profitability, don't call subsequent
1501 // rules.
1502 if (shouldInterchange.has_value())
1503 break;
1504 }
1505
1506 if (!shouldInterchange.has_value()) {
1507 ORE->emit([&]() {
1508 return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1509 InnerLoop->getStartLoc(),
1510 InnerLoop->getHeader())
1511 << "Insufficient information to calculate the cost of loop for "
1512 "interchange.";
1513 });
1514 return false;
1515 } else if (!shouldInterchange.value()) {
1516 ORE->emit([&]() {
1517 return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1518 InnerLoop->getStartLoc(),
1519 InnerLoop->getHeader())
1520 << "Interchanging loops is not considered to improve cache "
1521 "locality nor vectorization.";
1522 });
1523 return false;
1524 }
1525 return true;
1526}
1527
1528void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1529 Loop *InnerLoop) {
1530 for (Loop *L : *OuterLoop)
1531 if (L == InnerLoop) {
1532 OuterLoop->removeChildLoop(L);
1533 return;
1534 }
1535 llvm_unreachable("Couldn't find loop");
1536}
1537
1538/// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1539/// new inner and outer loop after interchanging: NewInner is the original
1540/// outer loop and NewOuter is the original inner loop.
1541///
1542/// Before interchanging, we have the following structure
1543/// Outer preheader
1544// Outer header
1545// Inner preheader
1546// Inner header
1547// Inner body
1548// Inner latch
1549// outer bbs
1550// Outer latch
1551//
1552// After interchanging:
1553// Inner preheader
1554// Inner header
1555// Outer preheader
1556// Outer header
1557// Inner body
1558// outer bbs
1559// Outer latch
1560// Inner latch
1561void LoopInterchangeTransform::restructureLoops(
1562 Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1563 BasicBlock *OrigOuterPreHeader) {
1564 Loop *OuterLoopParent = OuterLoop->getParentLoop();
1565 // The original inner loop preheader moves from the new inner loop to
1566 // the parent loop, if there is one.
1567 NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1568 LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1569
1570 // Switch the loop levels.
1571 if (OuterLoopParent) {
1572 // Remove the loop from its parent loop.
1573 removeChildLoop(OuterLoopParent, NewInner);
1574 removeChildLoop(NewInner, NewOuter);
1575 OuterLoopParent->addChildLoop(NewOuter);
1576 } else {
1577 removeChildLoop(NewInner, NewOuter);
1578 LI->changeTopLevelLoop(NewInner, NewOuter);
1579 }
1580 while (!NewOuter->isInnermost())
1581 NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1582 NewOuter->addChildLoop(NewInner);
1583
1584 // BBs from the original inner loop.
1585 SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1586
1587 // Add BBs from the original outer loop to the original inner loop (excluding
1588 // BBs already in inner loop)
1589 for (BasicBlock *BB : NewInner->blocks())
1590 if (LI->getLoopFor(BB) == NewInner)
1591 NewOuter->addBlockEntry(BB);
1592
1593 // Now remove inner loop header and latch from the new inner loop and move
1594 // other BBs (the loop body) to the new inner loop.
1595 BasicBlock *OuterHeader = NewOuter->getHeader();
1596 BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1597 for (BasicBlock *BB : OrigInnerBBs) {
1598 // Nothing will change for BBs in child loops.
1599 if (LI->getLoopFor(BB) != NewOuter)
1600 continue;
1601 // Remove the new outer loop header and latch from the new inner loop.
1602 if (BB == OuterHeader || BB == OuterLatch)
1603 NewInner->removeBlockFromLoop(BB);
1604 else
1605 LI->changeLoopFor(BB, NewInner);
1606 }
1607
1608 // The preheader of the original outer loop becomes part of the new
1609 // outer loop.
1610 NewOuter->addBlockEntry(OrigOuterPreHeader);
1611 LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1612
1613 // Tell SE that we move the loops around.
1614 SE->forgetLoop(NewOuter);
1615}
1616
1617bool LoopInterchangeTransform::transform(
1618 ArrayRef<Instruction *> DropNoWrapInsts) {
1619 bool Transformed = false;
1620
1621 if (InnerLoop->getSubLoops().empty()) {
1622 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1623 LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n");
1624 auto &InductionPHIs = LIL.getInnerLoopInductions();
1625 if (InductionPHIs.empty()) {
1626 LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
1627 return false;
1628 }
1629
1630 SmallVector<Instruction *, 8> InnerIndexVarList;
1631 for (PHINode *CurInductionPHI : InductionPHIs) {
1632 if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1633 InnerIndexVarList.push_back(
1634 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
1635 else
1636 InnerIndexVarList.push_back(
1637 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
1638 }
1639
1640 // Create a new latch block for the inner loop. We split at the
1641 // current latch's terminator and then move the condition and all
1642 // operands that are not either loop-invariant or the induction PHI into the
1643 // new latch block.
1644 BasicBlock *NewLatch =
1645 SplitBlock(InnerLoop->getLoopLatch(),
1646 InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
1647
1648 SmallSetVector<Instruction *, 4> WorkList;
1649 unsigned i = 0;
1650 auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() {
1651 for (; i < WorkList.size(); i++) {
1652 // Duplicate instruction and move it the new latch. Update uses that
1653 // have been moved.
1654 Instruction *NewI = WorkList[i]->clone();
1655 NewI->insertBefore(NewLatch->getFirstNonPHIIt());
1656 assert(!NewI->mayHaveSideEffects() &&
1657 "Moving instructions with side-effects may change behavior of "
1658 "the loop nest!");
1659 for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) {
1660 Instruction *UserI = cast<Instruction>(U.getUser());
1661 if (!InnerLoop->contains(UserI->getParent()) ||
1662 UserI->getParent() == NewLatch ||
1663 llvm::is_contained(InductionPHIs, UserI))
1664 U.set(NewI);
1665 }
1666 // Add operands of moved instruction to the worklist, except if they are
1667 // outside the inner loop or are the induction PHI.
1668 for (Value *Op : WorkList[i]->operands()) {
1670 if (!OpI ||
1671 this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
1672 llvm::is_contained(InductionPHIs, OpI))
1673 continue;
1674 WorkList.insert(OpI);
1675 }
1676 }
1677 };
1678
1679 // FIXME: Should we interchange when we have a constant condition?
1682 ->getCondition());
1683 if (CondI)
1684 WorkList.insert(CondI);
1685 MoveInstructions();
1686 for (Instruction *InnerIndexVar : InnerIndexVarList)
1687 WorkList.insert(cast<Instruction>(InnerIndexVar));
1688 MoveInstructions();
1689 }
1690
1691 // Ensure the inner loop phi nodes have a separate basic block.
1692 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1693 if (&*InnerLoopHeader->getFirstNonPHIIt() !=
1694 InnerLoopHeader->getTerminator()) {
1695 SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHIIt(), DT, LI);
1696 LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
1697 }
1698
1699 // Instructions in the original inner loop preheader may depend on values
1700 // defined in the outer loop header. Move them there, because the original
1701 // inner loop preheader will become the entry into the interchanged loop nest.
1702 // Currently we move all instructions and rely on LICM to move invariant
1703 // instructions outside the loop nest.
1704 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1705 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1706 if (InnerLoopPreHeader != OuterLoopHeader) {
1707 for (Instruction &I :
1708 make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
1709 std::prev(InnerLoopPreHeader->end()))))
1710 I.moveBeforePreserving(OuterLoopHeader->getTerminator()->getIterator());
1711 }
1712
1713 Transformed |= adjustLoopLinks();
1714 if (!Transformed) {
1715 LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");
1716 return false;
1717 }
1718
1719 // Finally, drop the nsw/nuw flags from the instructions for reduction
1720 // calculations.
1721 for (Instruction *Reduction : DropNoWrapInsts) {
1722 Reduction->setHasNoSignedWrap(false);
1723 Reduction->setHasNoUnsignedWrap(false);
1724 }
1725
1726 return true;
1727}
1728
1729/// \brief Move all instructions except the terminator from FromBB right before
1730/// InsertBefore
1731static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1732 BasicBlock *ToBB = InsertBefore->getParent();
1733
1734 ToBB->splice(InsertBefore->getIterator(), FromBB, FromBB->begin(),
1735 FromBB->getTerminator()->getIterator());
1736}
1737
1738/// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
1739static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
1740 // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
1741 // from BB1 afterwards.
1742 auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
1743 SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
1744 for (Instruction *I : TempInstrs)
1745 I->removeFromParent();
1746
1747 // Move instructions from BB2 to BB1.
1748 moveBBContents(BB2, BB1->getTerminator());
1749
1750 // Move instructions from TempInstrs to BB2.
1751 for (Instruction *I : TempInstrs)
1752 I->insertBefore(BB2->getTerminator()->getIterator());
1753}
1754
1755// Update BI to jump to NewBB instead of OldBB. Records updates to the
1756// dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1757// \p OldBB is exactly once in BI's successor list.
1758static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1759 BasicBlock *NewBB,
1760 std::vector<DominatorTree::UpdateType> &DTUpdates,
1761 bool MustUpdateOnce = true) {
1762 assert((!MustUpdateOnce || llvm::count(successors(BI), OldBB) == 1) &&
1763 "BI must jump to OldBB exactly once.");
1764 bool Changed = false;
1765 for (Use &Op : BI->operands())
1766 if (Op == OldBB) {
1767 Op.set(NewBB);
1768 Changed = true;
1769 }
1770
1771 if (Changed) {
1772 DTUpdates.push_back(
1773 {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
1774 DTUpdates.push_back(
1775 {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
1776 }
1777 assert(Changed && "Expected a successor to be updated");
1778}
1779
1780// Move Lcssa PHIs to the right place.
1781static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
1782 BasicBlock *InnerLatch, BasicBlock *OuterHeader,
1783 BasicBlock *OuterLatch, BasicBlock *OuterExit,
1784 Loop *InnerLoop, LoopInfo *LI) {
1785
1786 // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
1787 // defined either in the header or latch. Those blocks will become header and
1788 // latch of the new outer loop, and the only possible users can PHI nodes
1789 // in the exit block of the loop nest or the outer loop header (reduction
1790 // PHIs, in that case, the incoming value must be defined in the inner loop
1791 // header). We can just substitute the user with the incoming value and remove
1792 // the PHI.
1793 for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
1794 assert(P.getNumIncomingValues() == 1 &&
1795 "Only loops with a single exit are supported!");
1796
1797 // Incoming values are guaranteed be instructions currently.
1798 auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
1799 // In case of multi-level nested loops, follow LCSSA to find the incoming
1800 // value defined from the innermost loop.
1801 auto IncIInnerMost = cast<Instruction>(followLCSSA(IncI));
1802 // Skip phis with incoming values from the inner loop body, excluding the
1803 // header and latch.
1804 if (IncIInnerMost->getParent() != InnerLatch &&
1805 IncIInnerMost->getParent() != InnerHeader)
1806 continue;
1807
1808 assert(all_of(P.users(),
1809 [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {
1810 return (cast<PHINode>(U)->getParent() == OuterHeader &&
1811 IncI->getParent() == InnerHeader) ||
1812 cast<PHINode>(U)->getParent() == OuterExit;
1813 }) &&
1814 "Can only replace phis iff the uses are in the loop nest exit or "
1815 "the incoming value is defined in the inner header (it will "
1816 "dominate all loop blocks after interchanging)");
1817 P.replaceAllUsesWith(IncI);
1818 P.eraseFromParent();
1819 }
1820
1821 SmallVector<PHINode *, 8> LcssaInnerExit(
1822 llvm::make_pointer_range(InnerExit->phis()));
1823
1824 SmallVector<PHINode *, 8> LcssaInnerLatch(
1825 llvm::make_pointer_range(InnerLatch->phis()));
1826
1827 // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1828 // If a PHI node has users outside of InnerExit, it has a use outside the
1829 // interchanged loop and we have to preserve it. We move these to
1830 // InnerLatch, which will become the new exit block for the innermost
1831 // loop after interchanging.
1832 for (PHINode *P : LcssaInnerExit)
1833 P->moveBefore(InnerLatch->getFirstNonPHIIt());
1834
1835 // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1836 // and we have to move them to the new inner latch.
1837 for (PHINode *P : LcssaInnerLatch)
1838 P->moveBefore(InnerExit->getFirstNonPHIIt());
1839
1840 // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1841 // incoming values defined in the outer loop, we have to add a new PHI
1842 // in the inner loop latch, which became the exit block of the outer loop,
1843 // after interchanging.
1844 if (OuterExit) {
1845 for (PHINode &P : OuterExit->phis()) {
1846 if (P.getNumIncomingValues() != 1)
1847 continue;
1848 // Skip Phis with incoming values defined in the inner loop. Those should
1849 // already have been updated.
1850 auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
1851 if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
1852 continue;
1853
1854 PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
1855 NewPhi->setIncomingValue(0, P.getIncomingValue(0));
1856 NewPhi->setIncomingBlock(0, OuterLatch);
1857 // We might have incoming edges from other BBs, i.e., the original outer
1858 // header.
1859 for (auto *Pred : predecessors(InnerLatch)) {
1860 if (Pred == OuterLatch)
1861 continue;
1862 NewPhi->addIncoming(P.getIncomingValue(0), Pred);
1863 }
1864 NewPhi->insertBefore(InnerLatch->getFirstNonPHIIt());
1865 P.setIncomingValue(0, NewPhi);
1866 }
1867 }
1868
1869 // Now adjust the incoming blocks for the LCSSA PHIs.
1870 // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1871 // with the new latch.
1872 InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
1873}
1874
1875bool LoopInterchangeTransform::adjustLoopBranches() {
1876 LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
1877 std::vector<DominatorTree::UpdateType> DTUpdates;
1878
1879 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1880 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1881
1882 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1883 InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
1884 InnerLoopPreHeader && "Guaranteed by loop-simplify form");
1885 // Ensure that both preheaders do not contain PHI nodes and have single
1886 // predecessors. This allows us to move them easily. We use
1887 // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1888 // preheaders do not satisfy those conditions.
1889 if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1890 !OuterLoopPreHeader->getUniquePredecessor())
1891 OuterLoopPreHeader =
1892 InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
1893 if (InnerLoopPreHeader == OuterLoop->getHeader())
1894 InnerLoopPreHeader =
1895 InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
1896
1897 // Adjust the loop preheader
1898 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1899 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1900 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1901 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1902 BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1903 BasicBlock *InnerLoopLatchPredecessor =
1904 InnerLoopLatch->getUniquePredecessor();
1905 BasicBlock *InnerLoopLatchSuccessor;
1906 BasicBlock *OuterLoopLatchSuccessor;
1907
1908 BranchInst *OuterLoopLatchBI =
1909 dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1910 BranchInst *InnerLoopLatchBI =
1911 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1912 BranchInst *OuterLoopHeaderBI =
1913 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1914 BranchInst *InnerLoopHeaderBI =
1915 dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1916
1917 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1918 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1919 !InnerLoopHeaderBI)
1920 return false;
1921
1922 BranchInst *InnerLoopLatchPredecessorBI =
1923 dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1924 BranchInst *OuterLoopPredecessorBI =
1925 dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1926
1927 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1928 return false;
1929 BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1930 if (!InnerLoopHeaderSuccessor)
1931 return false;
1932
1933 // Adjust Loop Preheader and headers.
1934 // The branches in the outer loop predecessor and the outer loop header can
1935 // be unconditional branches or conditional branches with duplicates. Consider
1936 // this when updating the successors.
1937 updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1938 InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
1939 // The outer loop header might or might not branch to the outer latch.
1940 // We are guaranteed to branch to the inner loop preheader.
1941 if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
1942 // In this case the outerLoopHeader should branch to the InnerLoopLatch.
1943 updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
1944 DTUpdates,
1945 /*MustUpdateOnce=*/false);
1946 }
1947 updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1948 InnerLoopHeaderSuccessor, DTUpdates,
1949 /*MustUpdateOnce=*/false);
1950
1951 // Adjust reduction PHI's now that the incoming block has changed.
1952 InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
1953 OuterLoopHeader);
1954
1955 updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1956 OuterLoopPreHeader, DTUpdates);
1957
1958 // -------------Adjust loop latches-----------
1959 if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1960 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1961 else
1962 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1963
1964 updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1965 InnerLoopLatchSuccessor, DTUpdates);
1966
1967 if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1968 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1969 else
1970 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1971
1972 updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1973 OuterLoopLatchSuccessor, DTUpdates);
1974 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1975 DTUpdates);
1976
1977 DT->applyUpdates(DTUpdates);
1978 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1979 OuterLoopPreHeader);
1980
1981 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1982 OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
1983 InnerLoop, LI);
1984 // For PHIs in the exit block of the outer loop, outer's latch has been
1985 // replaced by Inners'.
1986 OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1987
1988 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1989 // Now update the reduction PHIs in the inner and outer loop headers.
1990 SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1991 for (PHINode &PHI : InnerLoopHeader->phis())
1992 if (OuterInnerReductions.contains(&PHI))
1993 InnerLoopPHIs.push_back(&PHI);
1994
1995 for (PHINode &PHI : OuterLoopHeader->phis())
1996 if (OuterInnerReductions.contains(&PHI))
1997 OuterLoopPHIs.push_back(&PHI);
1998
1999 // Now move the remaining reduction PHIs from outer to inner loop header and
2000 // vice versa. The PHI nodes must be part of a reduction across the inner and
2001 // outer loop and all the remains to do is and updating the incoming blocks.
2002 for (PHINode *PHI : OuterLoopPHIs) {
2003 LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump(););
2004 PHI->moveBefore(InnerLoopHeader->getFirstNonPHIIt());
2005 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
2006 }
2007 for (PHINode *PHI : InnerLoopPHIs) {
2008 LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump(););
2009 PHI->moveBefore(OuterLoopHeader->getFirstNonPHIIt());
2010 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
2011 }
2012
2013 // Update the incoming blocks for moved PHI nodes.
2014 OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
2015 OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
2016 InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
2017 InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
2018
2019 // Values defined in the outer loop header could be used in the inner loop
2020 // latch. In that case, we need to create LCSSA phis for them, because after
2021 // interchanging they will be defined in the new inner loop and used in the
2022 // new outer loop.
2023 SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
2024 for (Instruction &I :
2025 make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
2026 MayNeedLCSSAPhis.push_back(&I);
2027 formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE);
2028
2029 return true;
2030}
2031
2032bool LoopInterchangeTransform::adjustLoopLinks() {
2033 // Adjust all branches in the inner and outer loop.
2034 bool Changed = adjustLoopBranches();
2035 if (Changed) {
2036 // We have interchanged the preheaders so we need to interchange the data in
2037 // the preheaders as well. This is because the content of the inner
2038 // preheader was previously executed inside the outer loop.
2039 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
2040 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
2041 swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
2042 }
2043 return Changed;
2044}
2045
2049 LPMUpdater &U) {
2050 Function &F = *LN.getParent();
2051 SmallVector<Loop *, 8> LoopList(LN.getLoops());
2052
2053 if (MaxMemInstrCount < 1) {
2054 LLVM_DEBUG(dbgs() << "MaxMemInstrCount should be at least 1");
2055 return PreservedAnalyses::all();
2056 }
2058
2059 // Ensure minimum depth of the loop nest to do the interchange.
2060 if (!hasSupportedLoopDepth(LoopList, ORE))
2061 return PreservedAnalyses::all();
2062 // Ensure computable loop nest.
2063 if (!isComputableLoopNest(&AR.SE, LoopList)) {
2064 LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
2065 return PreservedAnalyses::all();
2066 }
2067
2068 ORE.emit([&]() {
2069 return OptimizationRemarkAnalysis(DEBUG_TYPE, "Dependence",
2072 << "Computed dependence info, invoking the transform.";
2073 });
2074
2075 DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
2076 if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &AR, &ORE).run(LN))
2077 return PreservedAnalyses::all();
2078 U.markLoopNestChanged(true);
2080}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the StringMap class.
Rewrite undef for PHI
ReachingDefInfo InstSet InstSet & Ignore
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define DEBUG_TYPE
Hexagon Common GEP
static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB)
Move the contents of SourceBB to before the last instruction of TargetBB.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the interface for the loop cache analysis.
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:231
static cl::opt< unsigned int > MaxMemInstrCount("loop-interchange-max-meminstr-count", cl::init(64), cl::Hidden, cl::desc("Maximum number of load-store instructions that should be handled " "in the dependency matrix. Higher value may lead to more interchanges " "at the cost of compile-time"))
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
static PHINode * findInnerReductionPhi(Loop *L, Value *V, SmallVectorImpl< Instruction * > &HasNoWrapInsts)
static cl::opt< unsigned int > MinLoopNestDepth("loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden, cl::desc("Minimum depth of loop nest considered for the transform"))
static bool isComputableLoopNest(ScalarEvolution *SE, ArrayRef< Loop * > LoopList)
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
static void printDepMatrix(CharMatrix &DepMatrix)
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
static cl::opt< unsigned int > MaxLoopNestDepth("loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden, cl::desc("Maximum depth of loop nest considered for the transform"))
static bool hasSupportedLoopDepth(ArrayRef< Loop * > LoopList, OptimizationRemarkEmitter &ORE)
static bool inThisOrder(const Instruction *Src, const Instruction *Dst)
Return true if Src appears before Dst in the same basic block.
static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId)
Return true if we can vectorize the loop specified by LoopId.
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
static bool areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, SmallPtrSetImpl< PHINode * > &Reductions)
#define DEBUG_TYPE
static Value * followLCSSA(Value *SV)
static void populateWorklist(Loop &L, LoopVector &LoopList)
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE)
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
static std::optional< bool > isLexicographicallyPositive(ArrayRef< char > DV, unsigned Begin, unsigned End)
static cl::list< RuleTy > Profitabilities("loop-interchange-profitabilities", cl::ZeroOrMore, cl::MiscFlags::CommaSeparated, cl::Hidden, cl::desc("List of profitability heuristics to be used. They are applied in " "the given order"), cl::list_init< RuleTy >({RuleTy::PerLoopCacheAnalysis, RuleTy::PerInstrOrderCost, RuleTy::ForVectorization}), cl::values(clEnumValN(RuleTy::PerLoopCacheAnalysis, "cache", "Prioritize loop cache cost"), clEnumValN(RuleTy::PerInstrOrderCost, "instorder", "Prioritize the IVs order of each instruction"), clEnumValN(RuleTy::ForVectorization, "vectorize", "Prioritize vectorization"), clEnumValN(RuleTy::Ignore, "ignore", "Ignore profitability, force interchange (does not " "work with other options)")))
static bool noDuplicateRulesAndIgnore(ArrayRef< RuleTy > Rules)
This file defines the interface for the loop nest analysis.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
loop Loop Strength Reduction
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
uint64_t IntrinsicInst * II
#define P(N)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:191
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition BasicBlock.h:662
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
CacheCostTy getLoopCost(const Loop &L) const
Return the estimated cost of loop L if the given loop is part of the loop nest associated with this o...
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:167
iterator end()
Definition DenseMap.h:81
DependenceInfo - This class is the main dependence-analysis driver.
LLVM_ABI std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool UnderRuntimeAssumptions=false)
depends - Tests for a dependence between the Src and Dst instructions.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
iterator begin() const
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End, bool CheckUniquePred=false)
Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Function * getParent() const
Return the function to which the loop-nest belongs.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition LoopInfo.cpp:632
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition LoopInfo.cpp:61
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
static unsigned getIncomingValueNumForOperand(unsigned i)
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
RecurKind getRecurrenceKind() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:102
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition StringMap.h:133
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
Definition StringMap.h:381
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
op_range operands()
Definition User.h:292
LLVM Value Representation.
Definition Value.h:75
iterator_range< user_iterator > users()
Definition Value.h:426
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
list_initializer< Ty > list_init(ArrayRef< Ty > Vals)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
InstructionCost Cost
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2452
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
auto successors(const MachineBasicBlock *BB)
LLVM_ABI bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition LCSSA.cpp:449
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
auto map_range(ContainerTy &&C, FuncTy F)
Definition STLExtras.h:366
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1948
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:325
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FMinimumNum
FP min with llvm.minimumnum semantics.
@ Or
Bitwise or logical OR of integers.
@ FMinimum
FP min with llvm.minimum semantics.
@ Mul
Product of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMaximum
FP max with llvm.maximum semantics.
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ FMul
Product of floats.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Add
Sum of integers.
@ FAdd
Sum of floats.
@ FMaximumNum
FP max with llvm.maximumnum semantics.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1934
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
Definition LCSSA.cpp:308
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...