LLVM 22.0.0git
LoopInterchange.cpp
Go to the documentation of this file.
1//===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This Pass handles loop interchange transform.
10// This pass interchanges loops to provide a more cache-friendly memory access
11// patterns.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/SmallSet.h"
19#include "llvm/ADT/Statistic.h"
20#include "llvm/ADT/StringMap.h"
21#include "llvm/ADT/StringRef.h"
30#include "llvm/IR/BasicBlock.h"
32#include "llvm/IR/Dominators.h"
33#include "llvm/IR/Function.h"
34#include "llvm/IR/InstrTypes.h"
35#include "llvm/IR/Instruction.h"
37#include "llvm/IR/User.h"
38#include "llvm/IR/Value.h"
41#include "llvm/Support/Debug.h"
48#include <cassert>
49#include <utility>
50#include <vector>
51
52using namespace llvm;
53
54#define DEBUG_TYPE "loop-interchange"
55
56STATISTIC(LoopsInterchanged, "Number of loops interchanged");
57
59 "loop-interchange-threshold", cl::init(0), cl::Hidden,
60 cl::desc("Interchange if you gain more than this number"));
61
62// Maximum number of load-stores that can be handled in the dependency matrix.
64 "loop-interchange-max-meminstr-count", cl::init(64), cl::Hidden,
66 "Maximum number of load-store instructions that should be handled "
67 "in the dependency matrix. Higher value may lead to more interchanges "
68 "at the cost of compile-time"));
69
70namespace {
71
73
74/// A list of direction vectors. Each entry represents a direction vector
75/// corresponding to one or more dependencies existing in the loop nest. The
76/// length of all direction vectors is equal and is N + 1, where N is the depth
77/// of the loop nest. The first N elements correspond to the dependency
78/// direction of each N loops. The last one indicates whether this entry is
79/// forward dependency ('<') or not ('*'). The term "forward" aligns with what
80/// is defined in LoopAccessAnalysis.
81// TODO: Check if we can use a sparse matrix here.
82using CharMatrix = std::vector<std::vector<char>>;
83
84/// Types of rules used in profitability check.
85enum class RuleTy {
86 PerLoopCacheAnalysis,
87 PerInstrOrderCost,
88 ForVectorization,
89 Ignore
90};
91
92} // end anonymous namespace
93
94// Minimum loop depth supported.
96 "loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden,
97 cl::desc("Minimum depth of loop nest considered for the transform"));
98
99// Maximum loop depth supported.
101 "loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden,
102 cl::desc("Maximum depth of loop nest considered for the transform"));
103
104// We prefer cache cost to vectorization by default.
106 "loop-interchange-profitabilities", cl::ZeroOrMore,
108 cl::desc("List of profitability heuristics to be used. They are applied in "
109 "the given order"),
110 cl::list_init<RuleTy>({RuleTy::PerLoopCacheAnalysis,
111 RuleTy::PerInstrOrderCost,
112 RuleTy::ForVectorization}),
113 cl::values(clEnumValN(RuleTy::PerLoopCacheAnalysis, "cache",
114 "Prioritize loop cache cost"),
115 clEnumValN(RuleTy::PerInstrOrderCost, "instorder",
116 "Prioritize the IVs order of each instruction"),
117 clEnumValN(RuleTy::ForVectorization, "vectorize",
118 "Prioritize vectorization"),
119 clEnumValN(RuleTy::Ignore, "ignore",
120 "Ignore profitability, force interchange (does not "
121 "work with other options)")));
122
123#ifndef NDEBUG
126 for (RuleTy Rule : Rules) {
127 if (!Set.insert(Rule).second)
128 return false;
129 if (Rule == RuleTy::Ignore)
130 return false;
131 }
132 return true;
133}
134
135static void printDepMatrix(CharMatrix &DepMatrix) {
136 for (auto &Row : DepMatrix) {
137 // Drop the last element because it is a flag indicating whether this is
138 // forward dependency or not, which doesn't affect the legality check.
139 for (char D : drop_end(Row))
140 LLVM_DEBUG(dbgs() << D << " ");
141 LLVM_DEBUG(dbgs() << "\n");
142 }
143}
144
145/// Return true if \p Src appears before \p Dst in the same basic block.
146/// Precondition: \p Src and \Dst are distinct instructions within the same
147/// basic block.
148static bool inThisOrder(const Instruction *Src, const Instruction *Dst) {
149 assert(Src->getParent() == Dst->getParent() && Src != Dst &&
150 "Expected Src and Dst to be different instructions in the same BB");
151
152 bool FoundSrc = false;
153 for (const Instruction &I : *(Src->getParent())) {
154 if (&I == Src) {
155 FoundSrc = true;
156 continue;
157 }
158 if (&I == Dst)
159 return FoundSrc;
160 }
161
162 llvm_unreachable("Dst not found");
163}
164#endif
165
166static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
167 Loop *L, DependenceInfo *DI,
168 ScalarEvolution *SE,
171
172 ValueVector MemInstr;
173
174 // For each block.
175 for (BasicBlock *BB : L->blocks()) {
176 // Scan the BB and collect legal loads and stores.
177 for (Instruction &I : *BB) {
178 if (!isa<Instruction>(I))
179 return false;
180 if (auto *Ld = dyn_cast<LoadInst>(&I)) {
181 if (!Ld->isSimple())
182 return false;
183 MemInstr.push_back(&I);
184 } else if (auto *St = dyn_cast<StoreInst>(&I)) {
185 if (!St->isSimple())
186 return false;
187 MemInstr.push_back(&I);
188 }
189 }
190 }
191
192 LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()
193 << " Loads and Stores to analyze\n");
194 if (MemInstr.size() > MaxMemInstrCount) {
195 LLVM_DEBUG(dbgs() << "The transform doesn't support more than "
196 << MaxMemInstrCount << " load/stores in a loop\n");
197 ORE->emit([&]() {
198 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedLoop",
199 L->getStartLoc(), L->getHeader())
200 << "Number of loads/stores exceeded, the supported maximum "
201 "can be increased with option "
202 "-loop-interchange-maxmeminstr-count.";
203 });
204 return false;
205 }
206 ValueVector::iterator I, IE, J, JE;
207
208 // Manage direction vectors that are already seen. Map each direction vector
209 // to an index of DepMatrix at which it is stored.
211
212 for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
213 for (J = I, JE = MemInstr.end(); J != JE; ++J) {
214 std::vector<char> Dep;
217 // Ignore Input dependencies.
218 if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
219 continue;
220 // Track Output, Flow, and Anti dependencies.
221 if (auto D = DI->depends(Src, Dst)) {
222 assert(D->isOrdered() && "Expected an output, flow or anti dep.");
223 // If the direction vector is negative, normalize it to
224 // make it non-negative.
225 if (D->normalize(SE))
226 LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n");
227 LLVM_DEBUG(StringRef DepType =
228 D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
229 dbgs() << "Found " << DepType
230 << " dependency between Src and Dst\n"
231 << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
232 unsigned Levels = D->getLevels();
233 char Direction;
234 for (unsigned II = 1; II <= Levels; ++II) {
235 // `DVEntry::LE` is converted to `*`. This is because `LE` means `<`
236 // or `=`, for which we don't have an equivalent representation, so
237 // that the conservative approximation is necessary. The same goes for
238 // `DVEntry::GE`.
239 // TODO: Use of fine-grained expressions allows for more accurate
240 // analysis.
241 unsigned Dir = D->getDirection(II);
242 if (Dir == Dependence::DVEntry::LT)
243 Direction = '<';
244 else if (Dir == Dependence::DVEntry::GT)
245 Direction = '>';
246 else if (Dir == Dependence::DVEntry::EQ)
247 Direction = '=';
248 else
249 Direction = '*';
250 Dep.push_back(Direction);
251 }
252
253 // If the Dependence object doesn't have any information, fill the
254 // dependency vector with '*'.
255 if (D->isConfused()) {
256 assert(Dep.empty() && "Expected empty dependency vector");
257 Dep.assign(Level, '*');
258 }
259
260 while (Dep.size() != Level) {
261 Dep.push_back('I');
262 }
263
264 // If all the elements of any direction vector have only '*', legality
265 // can't be proven. Exit early to save compile time.
266 if (all_of(Dep, [](char C) { return C == '*'; })) {
267 ORE->emit([&]() {
268 return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",
269 L->getStartLoc(), L->getHeader())
270 << "All loops have dependencies in all directions.";
271 });
272 return false;
273 }
274
275 // Test whether the dependency is forward or not.
276 bool IsKnownForward = true;
277 if (Src->getParent() != Dst->getParent()) {
278 // In general, when Src and Dst are in different BBs, the execution
279 // order of them within a single iteration is not guaranteed. Treat
280 // conservatively as not-forward dependency in this case.
281 IsKnownForward = false;
282 } else {
283 // Src and Dst are in the same BB. If they are the different
284 // instructions, Src should appear before Dst in the BB as they are
285 // stored to MemInstr in that order.
286 assert((Src == Dst || inThisOrder(Src, Dst)) &&
287 "Unexpected instructions");
288
289 // If the Dependence object is reversed (due to normalization), it
290 // represents the dependency from Dst to Src, meaning it is a backward
291 // dependency. Otherwise it should be a forward dependency.
292 bool IsReversed = D->getSrc() != Src;
293 if (IsReversed)
294 IsKnownForward = false;
295 }
296
297 // Initialize the last element. Assume forward dependencies only; it
298 // will be updated later if there is any non-forward dependency.
299 Dep.push_back('<');
300
301 // The last element should express the "summary" among one or more
302 // direction vectors whose first N elements are the same (where N is
303 // the depth of the loop nest). Hence we exclude the last element from
304 // the Seen map.
305 auto [Ite, Inserted] = Seen.try_emplace(
306 StringRef(Dep.data(), Dep.size() - 1), DepMatrix.size());
307
308 // Make sure we only add unique entries to the dependency matrix.
309 if (Inserted)
310 DepMatrix.push_back(Dep);
311
312 // If we cannot prove that this dependency is forward, change the last
313 // element of the corresponding entry. Since a `[... *]` dependency
314 // includes a `[... <]` dependency, we do not need to keep both and
315 // change the existing entry instead.
316 if (!IsKnownForward)
317 DepMatrix[Ite->second].back() = '*';
318 }
319 }
320 }
321
322 return true;
323}
324
325// A loop is moved from index 'from' to an index 'to'. Update the Dependence
326// matrix by exchanging the two columns.
327static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
328 unsigned ToIndx) {
329 for (auto &Row : DepMatrix)
330 std::swap(Row[ToIndx], Row[FromIndx]);
331}
332
333// Check if a direction vector is lexicographically positive. Return true if it
334// is positive, nullopt if it is "zero", otherwise false.
335// [Theorem] A permutation of the loops in a perfect nest is legal if and only
336// if the direction matrix, after the same permutation is applied to its
337// columns, has no ">" direction as the leftmost non-"=" direction in any row.
338static std::optional<bool>
339isLexicographicallyPositive(ArrayRef<char> DV, unsigned Begin, unsigned End) {
340 for (unsigned char Direction : DV.slice(Begin, End - Begin)) {
341 if (Direction == '<')
342 return true;
343 if (Direction == '>' || Direction == '*')
344 return false;
345 }
346 return std::nullopt;
347}
348
349// Checks if it is legal to interchange 2 loops.
350static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
351 unsigned InnerLoopId,
352 unsigned OuterLoopId) {
353 unsigned NumRows = DepMatrix.size();
354 std::vector<char> Cur;
355 // For each row check if it is valid to interchange.
356 for (unsigned Row = 0; Row < NumRows; ++Row) {
357 // Create temporary DepVector check its lexicographical order
358 // before and after swapping OuterLoop vs InnerLoop
359 Cur = DepMatrix[Row];
360
361 // If the surrounding loops already ensure that the direction vector is
362 // lexicographically positive, nothing within the loop will be able to break
363 // the dependence. In such a case we can skip the subsequent check.
364 if (isLexicographicallyPositive(Cur, 0, OuterLoopId) == true)
365 continue;
366
367 // Check if the direction vector is lexicographically positive (or zero)
368 // for both before/after exchanged. Ignore the last element because it
369 // doesn't affect the legality.
370 if (isLexicographicallyPositive(Cur, OuterLoopId, Cur.size() - 1) == false)
371 return false;
372 std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
373 if (isLexicographicallyPositive(Cur, OuterLoopId, Cur.size() - 1) == false)
374 return false;
375 }
376 return true;
377}
378
379static void populateWorklist(Loop &L, LoopVector &LoopList) {
380 LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
381 << L.getHeader()->getParent()->getName() << " Loop: %"
382 << L.getHeader()->getName() << '\n');
383 assert(LoopList.empty() && "LoopList should initially be empty!");
384 Loop *CurrentLoop = &L;
385 const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
386 while (!Vec->empty()) {
387 // The current loop has multiple subloops in it hence it is not tightly
388 // nested.
389 // Discard all loops above it added into Worklist.
390 if (Vec->size() != 1) {
391 LoopList = {};
392 return;
393 }
394
395 LoopList.push_back(CurrentLoop);
396 CurrentLoop = Vec->front();
397 Vec = &CurrentLoop->getSubLoops();
398 }
399 LoopList.push_back(CurrentLoop);
400}
401
404 unsigned LoopNestDepth = LoopList.size();
405 if (LoopNestDepth < MinLoopNestDepth || LoopNestDepth > MaxLoopNestDepth) {
406 LLVM_DEBUG(dbgs() << "Unsupported depth of loop nest " << LoopNestDepth
407 << ", the supported range is [" << MinLoopNestDepth
408 << ", " << MaxLoopNestDepth << "].\n");
409 Loop *OuterLoop = LoopList.front();
410 ORE.emit([&]() {
411 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedLoopNestDepth",
412 OuterLoop->getStartLoc(),
413 OuterLoop->getHeader())
414 << "Unsupported depth of loop nest, the supported range is ["
415 << std::to_string(MinLoopNestDepth) << ", "
416 << std::to_string(MaxLoopNestDepth) << "].\n";
417 });
418 return false;
419 }
420 return true;
421}
422
424 ArrayRef<Loop *> LoopList) {
425 for (Loop *L : LoopList) {
426 const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
427 if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
428 LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
429 return false;
430 }
431 if (L->getNumBackEdges() != 1) {
432 LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
433 return false;
434 }
435 if (!L->getExitingBlock()) {
436 LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
437 return false;
438 }
439 }
440 return true;
441}
442
443namespace {
444
445/// LoopInterchangeLegality checks if it is legal to interchange the loop.
446class LoopInterchangeLegality {
447public:
448 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
449 OptimizationRemarkEmitter *ORE)
450 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
451
452 /// Check if the loops can be interchanged.
453 bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
454 CharMatrix &DepMatrix);
455
456 /// Discover induction PHIs in the header of \p L. Induction
457 /// PHIs are added to \p Inductions.
458 bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
459
460 /// Check if the loop structure is understood. We do not handle triangular
461 /// loops for now.
462 bool isLoopStructureUnderstood();
463
464 bool currentLimitations();
465
466 const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
467 return OuterInnerReductions;
468 }
469
470 const ArrayRef<PHINode *> getInnerLoopInductions() const {
471 return InnerLoopInductions;
472 }
473
474 ArrayRef<Instruction *> getHasNoWrapReductions() const {
475 return HasNoWrapReductions;
476 }
477
478private:
479 bool tightlyNested(Loop *Outer, Loop *Inner);
480 bool containsUnsafeInstructions(BasicBlock *BB);
481
482 /// Discover induction and reduction PHIs in the header of \p L. Induction
483 /// PHIs are added to \p Inductions, reductions are added to
484 /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
485 /// to be passed as \p InnerLoop.
486 bool findInductionAndReductions(Loop *L,
487 SmallVector<PHINode *, 8> &Inductions,
488 Loop *InnerLoop);
489
490 Loop *OuterLoop;
491 Loop *InnerLoop;
492
493 ScalarEvolution *SE;
494
495 /// Interface to emit optimization remarks.
496 OptimizationRemarkEmitter *ORE;
497
498 /// Set of reduction PHIs taking part of a reduction across the inner and
499 /// outer loop.
500 SmallPtrSet<PHINode *, 4> OuterInnerReductions;
501
502 /// Set of inner loop induction PHIs
503 SmallVector<PHINode *, 8> InnerLoopInductions;
504
505 /// Hold instructions that have nuw/nsw flags and involved in reductions,
506 /// like integer addition/multiplication. Those flags must be dropped when
507 /// interchanging the loops.
508 SmallVector<Instruction *, 4> HasNoWrapReductions;
509};
510
511/// Manages information utilized by the profitability check for cache. The main
512/// purpose of this class is to delay the computation of CacheCost until it is
513/// actually needed.
514class CacheCostManager {
515 Loop *OutermostLoop;
516 LoopStandardAnalysisResults *AR;
517 DependenceInfo *DI;
518
519 /// CacheCost for \ref OutermostLoop. Once it is computed, it is cached. Note
520 /// that the result can be nullptr.
521 std::optional<std::unique_ptr<CacheCost>> CC;
522
523 /// Maps each loop to an index representing the optimal position within the
524 /// loop-nest, as determined by the cache cost analysis.
525 DenseMap<const Loop *, unsigned> CostMap;
526
527 void computeIfUnitinialized();
528
529public:
530 CacheCostManager(Loop *OutermostLoop, LoopStandardAnalysisResults *AR,
531 DependenceInfo *DI)
532 : OutermostLoop(OutermostLoop), AR(AR), DI(DI) {}
533 CacheCost *getCacheCost();
534 const DenseMap<const Loop *, unsigned> &getCostMap();
535};
536
537/// LoopInterchangeProfitability checks if it is profitable to interchange the
538/// loop.
539class LoopInterchangeProfitability {
540public:
541 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
542 OptimizationRemarkEmitter *ORE)
543 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
544
545 /// Check if the loop interchange is profitable.
546 bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
547 unsigned InnerLoopId, unsigned OuterLoopId,
548 CharMatrix &DepMatrix, CacheCostManager &CCM);
549
550private:
551 int getInstrOrderCost();
552 std::optional<bool> isProfitablePerLoopCacheAnalysis(
553 const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC);
554 std::optional<bool> isProfitablePerInstrOrderCost();
555 std::optional<bool> isProfitableForVectorization(unsigned InnerLoopId,
556 unsigned OuterLoopId,
557 CharMatrix &DepMatrix);
558 Loop *OuterLoop;
559 Loop *InnerLoop;
560
561 /// Scev analysis.
562 ScalarEvolution *SE;
563
564 /// Interface to emit optimization remarks.
565 OptimizationRemarkEmitter *ORE;
566};
567
568/// LoopInterchangeTransform interchanges the loop.
569class LoopInterchangeTransform {
570public:
571 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
572 LoopInfo *LI, DominatorTree *DT,
573 const LoopInterchangeLegality &LIL)
574 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
575
576 /// Interchange OuterLoop and InnerLoop.
577 bool transform(ArrayRef<Instruction *> DropNoWrapInsts);
578 void restructureLoops(Loop *NewInner, Loop *NewOuter,
579 BasicBlock *OrigInnerPreHeader,
580 BasicBlock *OrigOuterPreHeader);
581 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
582
583private:
584 bool adjustLoopLinks();
585 bool adjustLoopBranches();
586
587 Loop *OuterLoop;
588 Loop *InnerLoop;
589
590 /// Scev analysis.
591 ScalarEvolution *SE;
592
593 LoopInfo *LI;
594 DominatorTree *DT;
595
596 const LoopInterchangeLegality &LIL;
597};
598
599struct LoopInterchange {
600 ScalarEvolution *SE = nullptr;
601 LoopInfo *LI = nullptr;
602 DependenceInfo *DI = nullptr;
603 DominatorTree *DT = nullptr;
604 LoopStandardAnalysisResults *AR = nullptr;
605
606 /// Interface to emit optimization remarks.
607 OptimizationRemarkEmitter *ORE;
608
609 LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
610 DominatorTree *DT, LoopStandardAnalysisResults *AR,
611 OptimizationRemarkEmitter *ORE)
612 : SE(SE), LI(LI), DI(DI), DT(DT), AR(AR), ORE(ORE) {}
613
614 bool run(Loop *L) {
615 if (L->getParentLoop())
616 return false;
617 SmallVector<Loop *, 8> LoopList;
618 populateWorklist(*L, LoopList);
619 return processLoopList(LoopList);
620 }
621
622 bool run(LoopNest &LN) {
623 SmallVector<Loop *, 8> LoopList(LN.getLoops());
624 for (unsigned I = 1; I < LoopList.size(); ++I)
625 if (LoopList[I]->getParentLoop() != LoopList[I - 1])
626 return false;
627 return processLoopList(LoopList);
628 }
629
630 unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
631 // TODO: Add a better heuristic to select the loop to be interchanged based
632 // on the dependence matrix. Currently we select the innermost loop.
633 return LoopList.size() - 1;
634 }
635
636 bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
637 bool Changed = false;
638
639 // Ensure proper loop nest depth.
640 assert(hasSupportedLoopDepth(LoopList, *ORE) &&
641 "Unsupported depth of loop nest.");
642
643 unsigned LoopNestDepth = LoopList.size();
644
645 LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
646 << "\n");
647
648 CharMatrix DependencyMatrix;
649 Loop *OuterMostLoop = *(LoopList.begin());
650 if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
651 OuterMostLoop, DI, SE, ORE)) {
652 LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
653 return false;
654 }
655
656 LLVM_DEBUG(dbgs() << "Dependency matrix before interchange:\n";
657 printDepMatrix(DependencyMatrix));
658
659 // Get the Outermost loop exit.
660 BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
661 if (!LoopNestExit) {
662 LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block");
663 return false;
664 }
665
666 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
667 CacheCostManager CCM(LoopList[0], AR, DI);
668 // We try to achieve the globally optimal memory access for the loopnest,
669 // and do interchange based on a bubble-sort fasion. We start from
670 // the innermost loop, move it outwards to the best possible position
671 // and repeat this process.
672 for (unsigned j = SelecLoopId; j > 0; j--) {
673 bool ChangedPerIter = false;
674 for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
675 bool Interchanged =
676 processLoop(LoopList, i, i - 1, DependencyMatrix, CCM);
677 ChangedPerIter |= Interchanged;
678 Changed |= Interchanged;
679 }
680 // Early abort if there was no interchange during an entire round of
681 // moving loops outwards.
682 if (!ChangedPerIter)
683 break;
684 }
685 return Changed;
686 }
687
688 bool processLoop(SmallVectorImpl<Loop *> &LoopList, unsigned InnerLoopId,
689 unsigned OuterLoopId,
690 std::vector<std::vector<char>> &DependencyMatrix,
691 CacheCostManager &CCM) {
692 Loop *OuterLoop = LoopList[OuterLoopId];
693 Loop *InnerLoop = LoopList[InnerLoopId];
694 LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
695 << " and OuterLoopId = " << OuterLoopId << "\n");
696 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
697 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
698 LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
699 return false;
700 }
701 LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
702 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
703 if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
704 DependencyMatrix, CCM)) {
705 LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
706 return false;
707 }
708
709 ORE->emit([&]() {
710 return OptimizationRemark(DEBUG_TYPE, "Interchanged",
711 InnerLoop->getStartLoc(),
712 InnerLoop->getHeader())
713 << "Loop interchanged with enclosing loop.";
714 });
715
716 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
717 LIT.transform(LIL.getHasNoWrapReductions());
718 LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
719 LoopsInterchanged++;
720
721 llvm::formLCSSARecursively(*OuterLoop, *DT, LI, SE);
722
723 // Loops interchanged, update LoopList accordingly.
724 std::swap(LoopList[OuterLoopId], LoopList[InnerLoopId]);
725 // Update the DependencyMatrix
726 interChangeDependencies(DependencyMatrix, InnerLoopId, OuterLoopId);
727
728 LLVM_DEBUG(dbgs() << "Dependency matrix after interchange:\n";
729 printDepMatrix(DependencyMatrix));
730
731 return true;
732 }
733};
734
735} // end anonymous namespace
736
737bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
738 return any_of(*BB, [](const Instruction &I) {
739 return I.mayHaveSideEffects() || I.mayReadFromMemory();
740 });
741}
742
743bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
744 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
745 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
746 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
747
748 LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n");
749
750 // A perfectly nested loop will not have any branch in between the outer and
751 // inner block i.e. outer header will branch to either inner preheader and
752 // outerloop latch.
753 BranchInst *OuterLoopHeaderBI =
754 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
755 if (!OuterLoopHeaderBI)
756 return false;
757
758 for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
759 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
760 Succ != OuterLoopLatch)
761 return false;
762
763 LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
764 // We do not have any basic block in between now make sure the outer header
765 // and outer loop latch doesn't contain any unsafe instructions.
766 if (containsUnsafeInstructions(OuterLoopHeader) ||
767 containsUnsafeInstructions(OuterLoopLatch))
768 return false;
769
770 // Also make sure the inner loop preheader does not contain any unsafe
771 // instructions. Note that all instructions in the preheader will be moved to
772 // the outer loop header when interchanging.
773 if (InnerLoopPreHeader != OuterLoopHeader &&
774 containsUnsafeInstructions(InnerLoopPreHeader))
775 return false;
776
777 BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
778 // Ensure the inner loop exit block flows to the outer loop latch possibly
779 // through empty blocks.
780 const BasicBlock &SuccInner =
781 LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch);
782 if (&SuccInner != OuterLoopLatch) {
783 LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExit
784 << " does not lead to the outer loop latch.\n";);
785 return false;
786 }
787 // The inner loop exit block does flow to the outer loop latch and not some
788 // other BBs, now make sure it contains safe instructions, since it will be
789 // moved into the (new) inner loop after interchange.
790 if (containsUnsafeInstructions(InnerLoopExit))
791 return false;
792
793 LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
794 // We have a perfect loop nest.
795 return true;
796}
797
798bool LoopInterchangeLegality::isLoopStructureUnderstood() {
799 BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
800 for (PHINode *InnerInduction : InnerLoopInductions) {
801 unsigned Num = InnerInduction->getNumOperands();
802 for (unsigned i = 0; i < Num; ++i) {
803 Value *Val = InnerInduction->getOperand(i);
804 if (isa<Constant>(Val))
805 continue;
807 if (!I)
808 return false;
809 // TODO: Handle triangular loops.
810 // e.g. for(int i=0;i<N;i++)
811 // for(int j=i;j<N;j++)
812 unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
813 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
814 InnerLoopPreheader &&
815 !OuterLoop->isLoopInvariant(I)) {
816 return false;
817 }
818 }
819 }
820
821 // TODO: Handle triangular loops of another form.
822 // e.g. for(int i=0;i<N;i++)
823 // for(int j=0;j<i;j++)
824 // or,
825 // for(int i=0;i<N;i++)
826 // for(int j=0;j*i<N;j++)
827 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
828 BranchInst *InnerLoopLatchBI =
829 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
830 if (!InnerLoopLatchBI->isConditional())
831 return false;
832 if (CmpInst *InnerLoopCmp =
833 dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) {
834 Value *Op0 = InnerLoopCmp->getOperand(0);
835 Value *Op1 = InnerLoopCmp->getOperand(1);
836
837 // LHS and RHS of the inner loop exit condition, e.g.,
838 // in "for(int j=0;j<i;j++)", LHS is j and RHS is i.
839 Value *Left = nullptr;
840 Value *Right = nullptr;
841
842 // Check if V only involves inner loop induction variable.
843 // Return true if V is InnerInduction, or a cast from
844 // InnerInduction, or a binary operator that involves
845 // InnerInduction and a constant.
846 std::function<bool(Value *)> IsPathToInnerIndVar;
847 IsPathToInnerIndVar = [this, &IsPathToInnerIndVar](const Value *V) -> bool {
848 if (llvm::is_contained(InnerLoopInductions, V))
849 return true;
850 if (isa<Constant>(V))
851 return true;
853 if (!I)
854 return false;
855 if (isa<CastInst>(I))
856 return IsPathToInnerIndVar(I->getOperand(0));
858 return IsPathToInnerIndVar(I->getOperand(0)) &&
859 IsPathToInnerIndVar(I->getOperand(1));
860 return false;
861 };
862
863 // In case of multiple inner loop indvars, it is okay if LHS and RHS
864 // are both inner indvar related variables.
865 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
866 return true;
867
868 // Otherwise we check if the cmp instruction compares an inner indvar
869 // related variable (Left) with a outer loop invariant (Right).
870 if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
871 Left = Op0;
872 Right = Op1;
873 } else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
874 Left = Op1;
875 Right = Op0;
876 }
877
878 if (Left == nullptr)
879 return false;
880
881 const SCEV *S = SE->getSCEV(Right);
882 if (!SE->isLoopInvariant(S, OuterLoop))
883 return false;
884 }
885
886 return true;
887}
888
889// If SV is a LCSSA PHI node with a single incoming value, return the incoming
890// value.
891static Value *followLCSSA(Value *SV) {
893 if (!PHI)
894 return SV;
895
896 if (PHI->getNumIncomingValues() != 1)
897 return SV;
898 return followLCSSA(PHI->getIncomingValue(0));
899}
900
901// Check V's users to see if it is involved in a reduction in L.
902static PHINode *
904 SmallVectorImpl<Instruction *> &HasNoWrapInsts) {
905 // Reduction variables cannot be constants.
906 if (isa<Constant>(V))
907 return nullptr;
908
909 for (Value *User : V->users()) {
911 if (PHI->getNumIncomingValues() == 1)
912 continue;
915 // Detect floating point reduction only when it can be reordered.
916 if (RD.getExactFPMathInst() != nullptr)
917 return nullptr;
918
920 switch (RK) {
921 case RecurKind::Or:
922 case RecurKind::And:
923 case RecurKind::Xor:
924 case RecurKind::SMin:
925 case RecurKind::SMax:
926 case RecurKind::UMin:
927 case RecurKind::UMax:
928 case RecurKind::FAdd:
929 case RecurKind::FMul:
930 case RecurKind::FMin:
931 case RecurKind::FMax:
937 case RecurKind::AnyOf:
938 return PHI;
939
940 // Change the order of integer addition/multiplication may change the
941 // semantics. Consider the following case:
942 //
943 // int A[2][2] = {{ INT_MAX, INT_MAX }, { INT_MIN, INT_MIN }};
944 // int sum = 0;
945 // for (int i = 0; i < 2; i++)
946 // for (int j = 0; j < 2; j++)
947 // sum += A[j][i];
948 //
949 // If the above loops are exchanged, the addition will cause an
950 // overflow. To prevent this, we must drop the nuw/nsw flags from the
951 // addition/multiplication instructions when we actually exchanges the
952 // loops.
953 case RecurKind::Add:
954 case RecurKind::Mul: {
955 unsigned OpCode = RecurrenceDescriptor::getOpcode(RK);
957
958 // Bail out when we fail to collect reduction instructions chain.
959 if (Ops.empty())
960 return nullptr;
961
962 for (Instruction *I : Ops) {
963 assert(I->getOpcode() == OpCode &&
964 "Expected the instruction to be the reduction operation");
965 (void)OpCode;
966
967 // If the instruction has nuw/nsw flags, we must drop them when the
968 // transformation is actually performed.
969 if (I->hasNoSignedWrap() || I->hasNoUnsignedWrap())
970 HasNoWrapInsts.push_back(I);
971 }
972 return PHI;
973 }
974
975 default:
976 return nullptr;
977 }
978 }
979 return nullptr;
980 }
981 }
982
983 return nullptr;
984}
985
986bool LoopInterchangeLegality::findInductionAndReductions(
987 Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
988 if (!L->getLoopLatch() || !L->getLoopPredecessor())
989 return false;
990 for (PHINode &PHI : L->getHeader()->phis()) {
991 InductionDescriptor ID;
993 Inductions.push_back(&PHI);
994 else {
995 // PHIs in inner loops need to be part of a reduction in the outer loop,
996 // discovered when checking the PHIs of the outer loop earlier.
997 if (!InnerLoop) {
998 if (!OuterInnerReductions.count(&PHI)) {
999 LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
1000 "across the outer loop.\n");
1001 return false;
1002 }
1003 } else {
1004 assert(PHI.getNumIncomingValues() == 2 &&
1005 "Phis in loop header should have exactly 2 incoming values");
1006 // Check if we have a PHI node in the outer loop that has a reduction
1007 // result from the inner loop as an incoming value.
1008 Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
1009 PHINode *InnerRedPhi =
1010 findInnerReductionPhi(InnerLoop, V, HasNoWrapReductions);
1011 if (!InnerRedPhi ||
1012 !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
1013 LLVM_DEBUG(
1014 dbgs()
1015 << "Failed to recognize PHI as an induction or reduction.\n");
1016 return false;
1017 }
1018 OuterInnerReductions.insert(&PHI);
1019 OuterInnerReductions.insert(InnerRedPhi);
1020 }
1021 }
1022 }
1023 return true;
1024}
1025
1026// This function indicates the current limitations in the transform as a result
1027// of which we do not proceed.
1028bool LoopInterchangeLegality::currentLimitations() {
1029 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1030
1031 // transform currently expects the loop latches to also be the exiting
1032 // blocks.
1033 if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
1034 OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
1035 !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
1036 !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
1037 LLVM_DEBUG(
1038 dbgs() << "Loops where the latch is not the exiting block are not"
1039 << " supported currently.\n");
1040 ORE->emit([&]() {
1041 return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch",
1042 OuterLoop->getStartLoc(),
1043 OuterLoop->getHeader())
1044 << "Loops where the latch is not the exiting block cannot be"
1045 " interchange currently.";
1046 });
1047 return true;
1048 }
1049
1050 SmallVector<PHINode *, 8> Inductions;
1051 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
1052 LLVM_DEBUG(
1053 dbgs() << "Only outer loops with induction or reduction PHI nodes "
1054 << "are supported currently.\n");
1055 ORE->emit([&]() {
1056 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
1057 OuterLoop->getStartLoc(),
1058 OuterLoop->getHeader())
1059 << "Only outer loops with induction or reduction PHI nodes can be"
1060 " interchanged currently.";
1061 });
1062 return true;
1063 }
1064
1065 Inductions.clear();
1066 // For multi-level loop nests, make sure that all phi nodes for inner loops
1067 // at all levels can be recognized as a induction or reduction phi. Bail out
1068 // if a phi node at a certain nesting level cannot be properly recognized.
1069 Loop *CurLevelLoop = OuterLoop;
1070 while (!CurLevelLoop->getSubLoops().empty()) {
1071 // We already made sure that the loop nest is tightly nested.
1072 CurLevelLoop = CurLevelLoop->getSubLoops().front();
1073 if (!findInductionAndReductions(CurLevelLoop, Inductions, nullptr)) {
1074 LLVM_DEBUG(
1075 dbgs() << "Only inner loops with induction or reduction PHI nodes "
1076 << "are supported currently.\n");
1077 ORE->emit([&]() {
1078 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
1079 CurLevelLoop->getStartLoc(),
1080 CurLevelLoop->getHeader())
1081 << "Only inner loops with induction or reduction PHI nodes can be"
1082 " interchange currently.";
1083 });
1084 return true;
1085 }
1086 }
1087
1088 // TODO: Triangular loops are not handled for now.
1089 if (!isLoopStructureUnderstood()) {
1090 LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n");
1091 ORE->emit([&]() {
1092 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner",
1093 InnerLoop->getStartLoc(),
1094 InnerLoop->getHeader())
1095 << "Inner loop structure not understood currently.";
1096 });
1097 return true;
1098 }
1099
1100 return false;
1101}
1102
1103bool LoopInterchangeLegality::findInductions(
1104 Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
1105 for (PHINode &PHI : L->getHeader()->phis()) {
1106 InductionDescriptor ID;
1108 Inductions.push_back(&PHI);
1109 }
1110 return !Inductions.empty();
1111}
1112
1113// We currently only support LCSSA PHI nodes in the inner loop exit, if their
1114// users are either reduction PHIs or PHIs outside the outer loop (which means
1115// the we are only interested in the final value after the loop).
1116static bool
1119 BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
1120 for (PHINode &PHI : InnerExit->phis()) {
1121 // Reduction lcssa phi will have only 1 incoming block that from loop latch.
1122 if (PHI.getNumIncomingValues() > 1)
1123 return false;
1124 if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
1125 PHINode *PN = dyn_cast<PHINode>(U);
1126 return !PN ||
1127 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
1128 })) {
1129 return false;
1130 }
1131 }
1132 return true;
1133}
1134
1135// We currently support LCSSA PHI nodes in the outer loop exit, if their
1136// incoming values do not come from the outer loop latch or if the
1137// outer loop latch has a single predecessor. In that case, the value will
1138// be available if both the inner and outer loop conditions are true, which
1139// will still be true after interchanging. If we have multiple predecessor,
1140// that may not be the case, e.g. because the outer loop latch may be executed
1141// if the inner loop is not executed.
1142static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
1143 BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
1144 for (PHINode &PHI : LoopNestExit->phis()) {
1145 for (Value *Incoming : PHI.incoming_values()) {
1147 if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
1148 continue;
1149
1150 // The incoming value is defined in the outer loop latch. Currently we
1151 // only support that in case the outer loop latch has a single predecessor.
1152 // This guarantees that the outer loop latch is executed if and only if
1153 // the inner loop is executed (because tightlyNested() guarantees that the
1154 // outer loop header only branches to the inner loop or the outer loop
1155 // latch).
1156 // FIXME: We could weaken this logic and allow multiple predecessors,
1157 // if the values are produced outside the loop latch. We would need
1158 // additional logic to update the PHI nodes in the exit block as
1159 // well.
1160 if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
1161 return false;
1162 }
1163 }
1164 return true;
1165}
1166
1167// In case of multi-level nested loops, it may occur that lcssa phis exist in
1168// the latch of InnerLoop, i.e., when defs of the incoming values are further
1169// inside the loopnest. Sometimes those incoming values are not available
1170// after interchange, since the original inner latch will become the new outer
1171// latch which may have predecessor paths that do not include those incoming
1172// values.
1173// TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of
1174// multi-level loop nests.
1175static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
1176 if (InnerLoop->getSubLoops().empty())
1177 return true;
1178 // If the original outer latch has only one predecessor, then values defined
1179 // further inside the looploop, e.g., in the innermost loop, will be available
1180 // at the new outer latch after interchange.
1181 if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr)
1182 return true;
1183
1184 // The outer latch has more than one predecessors, i.e., the inner
1185 // exit and the inner header.
1186 // PHI nodes in the inner latch are lcssa phis where the incoming values
1187 // are defined further inside the loopnest. Check if those phis are used
1188 // in the original inner latch. If that is the case then bail out since
1189 // those incoming values may not be available at the new outer latch.
1190 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1191 for (PHINode &PHI : InnerLoopLatch->phis()) {
1192 for (auto *U : PHI.users()) {
1194 if (InnerLoopLatch == UI->getParent())
1195 return false;
1196 }
1197 }
1198 return true;
1199}
1200
1201bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
1202 unsigned OuterLoopId,
1203 CharMatrix &DepMatrix) {
1204 if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
1205 LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
1206 << " and OuterLoopId = " << OuterLoopId
1207 << " due to dependence\n");
1208 ORE->emit([&]() {
1209 return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",
1210 InnerLoop->getStartLoc(),
1211 InnerLoop->getHeader())
1212 << "Cannot interchange loops due to dependences.";
1213 });
1214 return false;
1215 }
1216 // Check if outer and inner loop contain legal instructions only.
1217 for (auto *BB : OuterLoop->blocks())
1218 for (Instruction &I : BB->instructionsWithoutDebug())
1219 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1220 // readnone functions do not prevent interchanging.
1221 if (CI->onlyWritesMemory())
1222 continue;
1223 LLVM_DEBUG(
1224 dbgs() << "Loops with call instructions cannot be interchanged "
1225 << "safely.");
1226 ORE->emit([&]() {
1227 return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst",
1228 CI->getDebugLoc(),
1229 CI->getParent())
1230 << "Cannot interchange loops due to call instruction.";
1231 });
1232
1233 return false;
1234 }
1235
1236 if (!findInductions(InnerLoop, InnerLoopInductions)) {
1237 LLVM_DEBUG(dbgs() << "Could not find inner loop induction variables.\n");
1238 return false;
1239 }
1240
1241 if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) {
1242 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n");
1243 ORE->emit([&]() {
1244 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedInnerLatchPHI",
1245 InnerLoop->getStartLoc(),
1246 InnerLoop->getHeader())
1247 << "Cannot interchange loops because unsupported PHI nodes found "
1248 "in inner loop latch.";
1249 });
1250 return false;
1251 }
1252
1253 // TODO: The loops could not be interchanged due to current limitations in the
1254 // transform module.
1255 if (currentLimitations()) {
1256 LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n");
1257 return false;
1258 }
1259
1260 // Check if the loops are tightly nested.
1261 if (!tightlyNested(OuterLoop, InnerLoop)) {
1262 LLVM_DEBUG(dbgs() << "Loops not tightly nested\n");
1263 ORE->emit([&]() {
1264 return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested",
1265 InnerLoop->getStartLoc(),
1266 InnerLoop->getHeader())
1267 << "Cannot interchange loops because they are not tightly "
1268 "nested.";
1269 });
1270 return false;
1271 }
1272
1273 if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1274 OuterInnerReductions)) {
1275 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n");
1276 ORE->emit([&]() {
1277 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1278 InnerLoop->getStartLoc(),
1279 InnerLoop->getHeader())
1280 << "Found unsupported PHI node in loop exit.";
1281 });
1282 return false;
1283 }
1284
1285 if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1286 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
1287 ORE->emit([&]() {
1288 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1289 OuterLoop->getStartLoc(),
1290 OuterLoop->getHeader())
1291 << "Found unsupported PHI node in loop exit.";
1292 });
1293 return false;
1294 }
1295
1296 return true;
1297}
1298
1299void CacheCostManager::computeIfUnitinialized() {
1300 if (CC.has_value())
1301 return;
1302
1303 LLVM_DEBUG(dbgs() << "Compute CacheCost.\n");
1304 CC = CacheCost::getCacheCost(*OutermostLoop, *AR, *DI);
1305 // Obtain the loop vector returned from loop cache analysis beforehand,
1306 // and put each <Loop, index> pair into a map for constant time query
1307 // later. Indices in loop vector reprsent the optimal order of the
1308 // corresponding loop, e.g., given a loopnest with depth N, index 0
1309 // indicates the loop should be placed as the outermost loop and index N
1310 // indicates the loop should be placed as the innermost loop.
1311 //
1312 // For the old pass manager CacheCost would be null.
1313 if (*CC != nullptr)
1314 for (const auto &[Idx, Cost] : enumerate((*CC)->getLoopCosts()))
1315 CostMap[Cost.first] = Idx;
1316}
1317
1318CacheCost *CacheCostManager::getCacheCost() {
1319 computeIfUnitinialized();
1320 return CC->get();
1321}
1322
1323const DenseMap<const Loop *, unsigned> &CacheCostManager::getCostMap() {
1324 computeIfUnitinialized();
1325 return CostMap;
1326}
1327
1328int LoopInterchangeProfitability::getInstrOrderCost() {
1329 unsigned GoodOrder, BadOrder;
1330 BadOrder = GoodOrder = 0;
1331 for (BasicBlock *BB : InnerLoop->blocks()) {
1332 for (Instruction &Ins : *BB) {
1333 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1334 bool FoundInnerInduction = false;
1335 bool FoundOuterInduction = false;
1336 for (Value *Op : GEP->operands()) {
1337 // Skip operands that are not SCEV-able.
1338 if (!SE->isSCEVable(Op->getType()))
1339 continue;
1340
1341 const SCEV *OperandVal = SE->getSCEV(Op);
1342 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1343 if (!AR)
1344 continue;
1345
1346 // If we find the inner induction after an outer induction e.g.
1347 // for(int i=0;i<N;i++)
1348 // for(int j=0;j<N;j++)
1349 // A[i][j] = A[i-1][j-1]+k;
1350 // then it is a good order.
1351 if (AR->getLoop() == InnerLoop) {
1352 // We found an InnerLoop induction after OuterLoop induction. It is
1353 // a good order.
1354 FoundInnerInduction = true;
1355 if (FoundOuterInduction) {
1356 GoodOrder++;
1357 break;
1358 }
1359 }
1360 // If we find the outer induction after an inner induction e.g.
1361 // for(int i=0;i<N;i++)
1362 // for(int j=0;j<N;j++)
1363 // A[j][i] = A[j-1][i-1]+k;
1364 // then it is a bad order.
1365 if (AR->getLoop() == OuterLoop) {
1366 // We found an OuterLoop induction after InnerLoop induction. It is
1367 // a bad order.
1368 FoundOuterInduction = true;
1369 if (FoundInnerInduction) {
1370 BadOrder++;
1371 break;
1372 }
1373 }
1374 }
1375 }
1376 }
1377 }
1378 return GoodOrder - BadOrder;
1379}
1380
1381std::optional<bool>
1382LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
1383 const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC) {
1384 // This is the new cost model returned from loop cache analysis.
1385 // A smaller index means the loop should be placed an outer loop, and vice
1386 // versa.
1387 auto InnerLoopIt = CostMap.find(InnerLoop);
1388 if (InnerLoopIt == CostMap.end())
1389 return std::nullopt;
1390 auto OuterLoopIt = CostMap.find(OuterLoop);
1391 if (OuterLoopIt == CostMap.end())
1392 return std::nullopt;
1393
1394 if (CC->getLoopCost(*OuterLoop) == CC->getLoopCost(*InnerLoop))
1395 return std::nullopt;
1396 unsigned InnerIndex = InnerLoopIt->second;
1397 unsigned OuterIndex = OuterLoopIt->second;
1398 LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndex
1399 << ", OuterIndex = " << OuterIndex << "\n");
1400 assert(InnerIndex != OuterIndex && "CostMap should assign unique "
1401 "numbers to each loop");
1402 return std::optional<bool>(InnerIndex < OuterIndex);
1403}
1404
1405std::optional<bool>
1406LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
1407 // Legacy cost model: this is rough cost estimation algorithm. It counts the
1408 // good and bad order of induction variables in the instruction and allows
1409 // reordering if number of bad orders is more than good.
1410 int Cost = getInstrOrderCost();
1411 LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
1413 return std::optional<bool>(true);
1414
1415 return std::nullopt;
1416}
1417
1418/// Return true if we can vectorize the loop specified by \p LoopId.
1419static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId) {
1420 for (const auto &Dep : DepMatrix) {
1421 char Dir = Dep[LoopId];
1422 char DepType = Dep.back();
1423 assert((DepType == '<' || DepType == '*') &&
1424 "Unexpected element in dependency vector");
1425
1426 // There are no loop-carried dependencies.
1427 if (Dir == '=' || Dir == 'I')
1428 continue;
1429
1430 // DepType being '<' means that this direction vector represents a forward
1431 // dependency. In principle, a loop with '<' direction can be vectorized in
1432 // this case.
1433 if (Dir == '<' && DepType == '<')
1434 continue;
1435
1436 // We cannot prove that the loop is vectorizable.
1437 return false;
1438 }
1439 return true;
1440}
1441
1442std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1443 unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) {
1444 // If the outer loop cannot be vectorized, it is not profitable to move this
1445 // to inner position.
1446 if (!canVectorize(DepMatrix, OuterLoopId))
1447 return false;
1448
1449 // If the inner loop cannot be vectorized but the outer loop can be, then it
1450 // is profitable to interchange to enable inner loop parallelism.
1451 if (!canVectorize(DepMatrix, InnerLoopId))
1452 return true;
1453
1454 // If both the inner and the outer loop can be vectorized, it is necessary to
1455 // check the cost of each vectorized loop for profitability decision. At this
1456 // time we do not have a cost model to estimate them, so return nullopt.
1457 // TODO: Estimate the cost of vectorized loop when both the outer and the
1458 // inner loop can be vectorized.
1459 return std::nullopt;
1460}
1461
1462bool LoopInterchangeProfitability::isProfitable(
1463 const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
1464 unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
1465
1466 // Return true if interchange is forced and the cost-model ignored.
1467 if (Profitabilities.size() == 1 && Profitabilities[0] == RuleTy::Ignore)
1468 return true;
1470 "Duplicate rules and option 'ignore' are not allowed");
1471
1472 // isProfitable() is structured to avoid endless loop interchange. If the
1473 // highest priority rule (isProfitablePerLoopCacheAnalysis by default) could
1474 // decide the profitability then, profitability check will stop and return the
1475 // analysis result. If it failed to determine it (e.g., cache analysis failed
1476 // to analyze the loopnest due to delinearization issues) then go ahead the
1477 // second highest priority rule (isProfitablePerInstrOrderCost by default).
1478 // Likewise, if it failed to analysis the profitability then only, the last
1479 // rule (isProfitableForVectorization by default) will decide.
1480 std::optional<bool> shouldInterchange;
1481 for (RuleTy RT : Profitabilities) {
1482 switch (RT) {
1483 case RuleTy::PerLoopCacheAnalysis: {
1484 CacheCost *CC = CCM.getCacheCost();
1485 const DenseMap<const Loop *, unsigned> &CostMap = CCM.getCostMap();
1486 shouldInterchange = isProfitablePerLoopCacheAnalysis(CostMap, CC);
1487 break;
1488 }
1489 case RuleTy::PerInstrOrderCost:
1490 shouldInterchange = isProfitablePerInstrOrderCost();
1491 break;
1492 case RuleTy::ForVectorization:
1493 shouldInterchange =
1494 isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
1495 break;
1496 case RuleTy::Ignore:
1497 llvm_unreachable("Option 'ignore' is not supported with other options");
1498 break;
1499 }
1500
1501 // If this rule could determine the profitability, don't call subsequent
1502 // rules.
1503 if (shouldInterchange.has_value())
1504 break;
1505 }
1506
1507 if (!shouldInterchange.has_value()) {
1508 ORE->emit([&]() {
1509 return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1510 InnerLoop->getStartLoc(),
1511 InnerLoop->getHeader())
1512 << "Insufficient information to calculate the cost of loop for "
1513 "interchange.";
1514 });
1515 return false;
1516 } else if (!shouldInterchange.value()) {
1517 ORE->emit([&]() {
1518 return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1519 InnerLoop->getStartLoc(),
1520 InnerLoop->getHeader())
1521 << "Interchanging loops is not considered to improve cache "
1522 "locality nor vectorization.";
1523 });
1524 return false;
1525 }
1526 return true;
1527}
1528
1529void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1530 Loop *InnerLoop) {
1531 for (Loop *L : *OuterLoop)
1532 if (L == InnerLoop) {
1533 OuterLoop->removeChildLoop(L);
1534 return;
1535 }
1536 llvm_unreachable("Couldn't find loop");
1537}
1538
1539/// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1540/// new inner and outer loop after interchanging: NewInner is the original
1541/// outer loop and NewOuter is the original inner loop.
1542///
1543/// Before interchanging, we have the following structure
1544/// Outer preheader
1545// Outer header
1546// Inner preheader
1547// Inner header
1548// Inner body
1549// Inner latch
1550// outer bbs
1551// Outer latch
1552//
1553// After interchanging:
1554// Inner preheader
1555// Inner header
1556// Outer preheader
1557// Outer header
1558// Inner body
1559// outer bbs
1560// Outer latch
1561// Inner latch
1562void LoopInterchangeTransform::restructureLoops(
1563 Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1564 BasicBlock *OrigOuterPreHeader) {
1565 Loop *OuterLoopParent = OuterLoop->getParentLoop();
1566 // The original inner loop preheader moves from the new inner loop to
1567 // the parent loop, if there is one.
1568 NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1569 LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1570
1571 // Switch the loop levels.
1572 if (OuterLoopParent) {
1573 // Remove the loop from its parent loop.
1574 removeChildLoop(OuterLoopParent, NewInner);
1575 removeChildLoop(NewInner, NewOuter);
1576 OuterLoopParent->addChildLoop(NewOuter);
1577 } else {
1578 removeChildLoop(NewInner, NewOuter);
1579 LI->changeTopLevelLoop(NewInner, NewOuter);
1580 }
1581 while (!NewOuter->isInnermost())
1582 NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1583 NewOuter->addChildLoop(NewInner);
1584
1585 // BBs from the original inner loop.
1586 SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1587
1588 // Add BBs from the original outer loop to the original inner loop (excluding
1589 // BBs already in inner loop)
1590 for (BasicBlock *BB : NewInner->blocks())
1591 if (LI->getLoopFor(BB) == NewInner)
1592 NewOuter->addBlockEntry(BB);
1593
1594 // Now remove inner loop header and latch from the new inner loop and move
1595 // other BBs (the loop body) to the new inner loop.
1596 BasicBlock *OuterHeader = NewOuter->getHeader();
1597 BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1598 for (BasicBlock *BB : OrigInnerBBs) {
1599 // Nothing will change for BBs in child loops.
1600 if (LI->getLoopFor(BB) != NewOuter)
1601 continue;
1602 // Remove the new outer loop header and latch from the new inner loop.
1603 if (BB == OuterHeader || BB == OuterLatch)
1604 NewInner->removeBlockFromLoop(BB);
1605 else
1606 LI->changeLoopFor(BB, NewInner);
1607 }
1608
1609 // The preheader of the original outer loop becomes part of the new
1610 // outer loop.
1611 NewOuter->addBlockEntry(OrigOuterPreHeader);
1612 LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1613
1614 // Tell SE that we move the loops around.
1615 SE->forgetLoop(NewOuter);
1616}
1617
1618bool LoopInterchangeTransform::transform(
1619 ArrayRef<Instruction *> DropNoWrapInsts) {
1620 bool Transformed = false;
1621
1622 if (InnerLoop->getSubLoops().empty()) {
1623 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1624 LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n");
1625 auto &InductionPHIs = LIL.getInnerLoopInductions();
1626 if (InductionPHIs.empty()) {
1627 LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
1628 return false;
1629 }
1630
1631 SmallVector<Instruction *, 8> InnerIndexVarList;
1632 for (PHINode *CurInductionPHI : InductionPHIs) {
1633 if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1634 InnerIndexVarList.push_back(
1635 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
1636 else
1637 InnerIndexVarList.push_back(
1638 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
1639 }
1640
1641 // Create a new latch block for the inner loop. We split at the
1642 // current latch's terminator and then move the condition and all
1643 // operands that are not either loop-invariant or the induction PHI into the
1644 // new latch block.
1645 BasicBlock *NewLatch =
1646 SplitBlock(InnerLoop->getLoopLatch(),
1647 InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
1648
1649 SmallSetVector<Instruction *, 4> WorkList;
1650 unsigned i = 0;
1651 auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() {
1652 for (; i < WorkList.size(); i++) {
1653 // Duplicate instruction and move it the new latch. Update uses that
1654 // have been moved.
1655 Instruction *NewI = WorkList[i]->clone();
1656 NewI->insertBefore(NewLatch->getFirstNonPHIIt());
1657 assert(!NewI->mayHaveSideEffects() &&
1658 "Moving instructions with side-effects may change behavior of "
1659 "the loop nest!");
1660 for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) {
1661 Instruction *UserI = cast<Instruction>(U.getUser());
1662 if (!InnerLoop->contains(UserI->getParent()) ||
1663 UserI->getParent() == NewLatch ||
1664 llvm::is_contained(InductionPHIs, UserI))
1665 U.set(NewI);
1666 }
1667 // Add operands of moved instruction to the worklist, except if they are
1668 // outside the inner loop or are the induction PHI.
1669 for (Value *Op : WorkList[i]->operands()) {
1671 if (!OpI ||
1672 this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
1673 llvm::is_contained(InductionPHIs, OpI))
1674 continue;
1675 WorkList.insert(OpI);
1676 }
1677 }
1678 };
1679
1680 // FIXME: Should we interchange when we have a constant condition?
1683 ->getCondition());
1684 if (CondI)
1685 WorkList.insert(CondI);
1686 MoveInstructions();
1687 for (Instruction *InnerIndexVar : InnerIndexVarList)
1688 WorkList.insert(cast<Instruction>(InnerIndexVar));
1689 MoveInstructions();
1690 }
1691
1692 // Ensure the inner loop phi nodes have a separate basic block.
1693 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1694 if (&*InnerLoopHeader->getFirstNonPHIIt() !=
1695 InnerLoopHeader->getTerminator()) {
1696 SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHIIt(), DT, LI);
1697 LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
1698 }
1699
1700 // Instructions in the original inner loop preheader may depend on values
1701 // defined in the outer loop header. Move them there, because the original
1702 // inner loop preheader will become the entry into the interchanged loop nest.
1703 // Currently we move all instructions and rely on LICM to move invariant
1704 // instructions outside the loop nest.
1705 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1706 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1707 if (InnerLoopPreHeader != OuterLoopHeader) {
1708 for (Instruction &I :
1709 make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
1710 std::prev(InnerLoopPreHeader->end()))))
1711 I.moveBeforePreserving(OuterLoopHeader->getTerminator()->getIterator());
1712 }
1713
1714 Transformed |= adjustLoopLinks();
1715 if (!Transformed) {
1716 LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");
1717 return false;
1718 }
1719
1720 // Finally, drop the nsw/nuw flags from the instructions for reduction
1721 // calculations.
1722 for (Instruction *Reduction : DropNoWrapInsts) {
1723 Reduction->setHasNoSignedWrap(false);
1724 Reduction->setHasNoUnsignedWrap(false);
1725 }
1726
1727 return true;
1728}
1729
1730/// \brief Move all instructions except the terminator from FromBB right before
1731/// InsertBefore
1732static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1733 BasicBlock *ToBB = InsertBefore->getParent();
1734
1735 ToBB->splice(InsertBefore->getIterator(), FromBB, FromBB->begin(),
1736 FromBB->getTerminator()->getIterator());
1737}
1738
1739/// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
1740static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
1741 // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
1742 // from BB1 afterwards.
1743 auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
1744 SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
1745 for (Instruction *I : TempInstrs)
1746 I->removeFromParent();
1747
1748 // Move instructions from BB2 to BB1.
1749 moveBBContents(BB2, BB1->getTerminator());
1750
1751 // Move instructions from TempInstrs to BB2.
1752 for (Instruction *I : TempInstrs)
1753 I->insertBefore(BB2->getTerminator()->getIterator());
1754}
1755
1756// Update BI to jump to NewBB instead of OldBB. Records updates to the
1757// dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1758// \p OldBB is exactly once in BI's successor list.
1759static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1760 BasicBlock *NewBB,
1761 std::vector<DominatorTree::UpdateType> &DTUpdates,
1762 bool MustUpdateOnce = true) {
1763 assert((!MustUpdateOnce || llvm::count(successors(BI), OldBB) == 1) &&
1764 "BI must jump to OldBB exactly once.");
1765 bool Changed = false;
1766 for (Use &Op : BI->operands())
1767 if (Op == OldBB) {
1768 Op.set(NewBB);
1769 Changed = true;
1770 }
1771
1772 if (Changed) {
1773 DTUpdates.push_back(
1774 {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
1775 DTUpdates.push_back(
1776 {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
1777 }
1778 assert(Changed && "Expected a successor to be updated");
1779}
1780
1781// Move Lcssa PHIs to the right place.
1782static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
1783 BasicBlock *InnerLatch, BasicBlock *OuterHeader,
1784 BasicBlock *OuterLatch, BasicBlock *OuterExit,
1785 Loop *InnerLoop, LoopInfo *LI) {
1786
1787 // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
1788 // defined either in the header or latch. Those blocks will become header and
1789 // latch of the new outer loop, and the only possible users can PHI nodes
1790 // in the exit block of the loop nest or the outer loop header (reduction
1791 // PHIs, in that case, the incoming value must be defined in the inner loop
1792 // header). We can just substitute the user with the incoming value and remove
1793 // the PHI.
1794 for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
1795 assert(P.getNumIncomingValues() == 1 &&
1796 "Only loops with a single exit are supported!");
1797
1798 // Incoming values are guaranteed be instructions currently.
1799 auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
1800 // In case of multi-level nested loops, follow LCSSA to find the incoming
1801 // value defined from the innermost loop.
1802 auto IncIInnerMost = cast<Instruction>(followLCSSA(IncI));
1803 // Skip phis with incoming values from the inner loop body, excluding the
1804 // header and latch.
1805 if (IncIInnerMost->getParent() != InnerLatch &&
1806 IncIInnerMost->getParent() != InnerHeader)
1807 continue;
1808
1809 assert(all_of(P.users(),
1810 [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {
1811 return (cast<PHINode>(U)->getParent() == OuterHeader &&
1812 IncI->getParent() == InnerHeader) ||
1813 cast<PHINode>(U)->getParent() == OuterExit;
1814 }) &&
1815 "Can only replace phis iff the uses are in the loop nest exit or "
1816 "the incoming value is defined in the inner header (it will "
1817 "dominate all loop blocks after interchanging)");
1818 P.replaceAllUsesWith(IncI);
1819 P.eraseFromParent();
1820 }
1821
1822 SmallVector<PHINode *, 8> LcssaInnerExit(
1823 llvm::make_pointer_range(InnerExit->phis()));
1824
1825 SmallVector<PHINode *, 8> LcssaInnerLatch(
1826 llvm::make_pointer_range(InnerLatch->phis()));
1827
1828 // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1829 // If a PHI node has users outside of InnerExit, it has a use outside the
1830 // interchanged loop and we have to preserve it. We move these to
1831 // InnerLatch, which will become the new exit block for the innermost
1832 // loop after interchanging.
1833 for (PHINode *P : LcssaInnerExit)
1834 P->moveBefore(InnerLatch->getFirstNonPHIIt());
1835
1836 // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1837 // and we have to move them to the new inner latch.
1838 for (PHINode *P : LcssaInnerLatch)
1839 P->moveBefore(InnerExit->getFirstNonPHIIt());
1840
1841 // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1842 // incoming values defined in the outer loop, we have to add a new PHI
1843 // in the inner loop latch, which became the exit block of the outer loop,
1844 // after interchanging.
1845 if (OuterExit) {
1846 for (PHINode &P : OuterExit->phis()) {
1847 if (P.getNumIncomingValues() != 1)
1848 continue;
1849 // Skip Phis with incoming values defined in the inner loop. Those should
1850 // already have been updated.
1851 auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
1852 if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
1853 continue;
1854
1855 PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
1856 NewPhi->setIncomingValue(0, P.getIncomingValue(0));
1857 NewPhi->setIncomingBlock(0, OuterLatch);
1858 // We might have incoming edges from other BBs, i.e., the original outer
1859 // header.
1860 for (auto *Pred : predecessors(InnerLatch)) {
1861 if (Pred == OuterLatch)
1862 continue;
1863 NewPhi->addIncoming(P.getIncomingValue(0), Pred);
1864 }
1865 NewPhi->insertBefore(InnerLatch->getFirstNonPHIIt());
1866 P.setIncomingValue(0, NewPhi);
1867 }
1868 }
1869
1870 // Now adjust the incoming blocks for the LCSSA PHIs.
1871 // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1872 // with the new latch.
1873 InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
1874}
1875
1876/// This deals with a corner case when a LCSSA phi node appears in a non-exit
1877/// block: the outer loop latch block does not need to be exit block of the
1878/// inner loop. Consider a loop that was in LCSSA form, but then some
1879/// transformation like loop-unswitch comes along and creates an empty block,
1880/// where BB5 in this example is the outer loop latch block:
1881///
1882/// BB4:
1883/// br label %BB5
1884/// BB5:
1885/// %old.cond.lcssa = phi i16 [ %cond, %BB4 ]
1886/// br outer.header
1887///
1888/// Interchange then brings it in LCSSA form again resulting in this chain of
1889/// single-input phi nodes:
1890///
1891/// BB4:
1892/// %new.cond.lcssa = phi i16 [ %cond, %BB3 ]
1893/// br label %BB5
1894/// BB5:
1895/// %old.cond.lcssa = phi i16 [ %new.cond.lcssa, %BB4 ]
1896///
1897/// The problem is that interchange can reoder blocks BB4 and BB5 placing the
1898/// use before the def if we don't check this. The solution is to simplify
1899/// lcssa phi nodes (remove) if they appear in non-exit blocks.
1900///
1901static void simplifyLCSSAPhis(Loop *OuterLoop, Loop *InnerLoop) {
1902 BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
1903 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1904
1905 // Do not modify lcssa phis where they actually belong, i.e. in exit blocks.
1906 if (OuterLoopLatch == InnerLoopExit)
1907 return;
1908
1909 // Collect and remove phis in non-exit blocks if they have 1 input.
1911 llvm::make_pointer_range(OuterLoopLatch->phis()));
1912 for (PHINode *Phi : Phis) {
1913 assert(Phi->getNumIncomingValues() == 1 && "Single input phi expected");
1914 LLVM_DEBUG(dbgs() << "Removing 1-input phi in non-exit block: " << *Phi
1915 << "\n");
1916 Phi->replaceAllUsesWith(Phi->getIncomingValue(0));
1917 Phi->eraseFromParent();
1918 }
1919}
1920
1921bool LoopInterchangeTransform::adjustLoopBranches() {
1922 LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
1923 std::vector<DominatorTree::UpdateType> DTUpdates;
1924
1925 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1926 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1927
1928 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1929 InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
1930 InnerLoopPreHeader && "Guaranteed by loop-simplify form");
1931
1932 simplifyLCSSAPhis(OuterLoop, InnerLoop);
1933
1934 // Ensure that both preheaders do not contain PHI nodes and have single
1935 // predecessors. This allows us to move them easily. We use
1936 // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1937 // preheaders do not satisfy those conditions.
1938 if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1939 !OuterLoopPreHeader->getUniquePredecessor())
1940 OuterLoopPreHeader =
1941 InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
1942 if (InnerLoopPreHeader == OuterLoop->getHeader())
1943 InnerLoopPreHeader =
1944 InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
1945
1946 // Adjust the loop preheader
1947 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1948 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1949 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1950 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1951 BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1952 BasicBlock *InnerLoopLatchPredecessor =
1953 InnerLoopLatch->getUniquePredecessor();
1954 BasicBlock *InnerLoopLatchSuccessor;
1955 BasicBlock *OuterLoopLatchSuccessor;
1956
1957 BranchInst *OuterLoopLatchBI =
1958 dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1959 BranchInst *InnerLoopLatchBI =
1960 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1961 BranchInst *OuterLoopHeaderBI =
1962 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1963 BranchInst *InnerLoopHeaderBI =
1964 dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1965
1966 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1967 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1968 !InnerLoopHeaderBI)
1969 return false;
1970
1971 BranchInst *InnerLoopLatchPredecessorBI =
1972 dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1973 BranchInst *OuterLoopPredecessorBI =
1974 dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1975
1976 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1977 return false;
1978 BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1979 if (!InnerLoopHeaderSuccessor)
1980 return false;
1981
1982 // Adjust Loop Preheader and headers.
1983 // The branches in the outer loop predecessor and the outer loop header can
1984 // be unconditional branches or conditional branches with duplicates. Consider
1985 // this when updating the successors.
1986 updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1987 InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
1988 // The outer loop header might or might not branch to the outer latch.
1989 // We are guaranteed to branch to the inner loop preheader.
1990 if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
1991 // In this case the outerLoopHeader should branch to the InnerLoopLatch.
1992 updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
1993 DTUpdates,
1994 /*MustUpdateOnce=*/false);
1995 }
1996 updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1997 InnerLoopHeaderSuccessor, DTUpdates,
1998 /*MustUpdateOnce=*/false);
1999
2000 // Adjust reduction PHI's now that the incoming block has changed.
2001 InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
2002 OuterLoopHeader);
2003
2004 updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
2005 OuterLoopPreHeader, DTUpdates);
2006
2007 // -------------Adjust loop latches-----------
2008 if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
2009 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
2010 else
2011 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
2012
2013 updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
2014 InnerLoopLatchSuccessor, DTUpdates);
2015
2016 if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
2017 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
2018 else
2019 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
2020
2021 updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
2022 OuterLoopLatchSuccessor, DTUpdates);
2023 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
2024 DTUpdates);
2025
2026 DT->applyUpdates(DTUpdates);
2027 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
2028 OuterLoopPreHeader);
2029
2030 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
2031 OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
2032 InnerLoop, LI);
2033 // For PHIs in the exit block of the outer loop, outer's latch has been
2034 // replaced by Inners'.
2035 OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
2036
2037 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
2038 // Now update the reduction PHIs in the inner and outer loop headers.
2039 SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
2040 for (PHINode &PHI : InnerLoopHeader->phis())
2041 if (OuterInnerReductions.contains(&PHI))
2042 InnerLoopPHIs.push_back(&PHI);
2043
2044 for (PHINode &PHI : OuterLoopHeader->phis())
2045 if (OuterInnerReductions.contains(&PHI))
2046 OuterLoopPHIs.push_back(&PHI);
2047
2048 // Now move the remaining reduction PHIs from outer to inner loop header and
2049 // vice versa. The PHI nodes must be part of a reduction across the inner and
2050 // outer loop and all the remains to do is and updating the incoming blocks.
2051 for (PHINode *PHI : OuterLoopPHIs) {
2052 LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump(););
2053 PHI->moveBefore(InnerLoopHeader->getFirstNonPHIIt());
2054 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
2055 }
2056 for (PHINode *PHI : InnerLoopPHIs) {
2057 LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump(););
2058 PHI->moveBefore(OuterLoopHeader->getFirstNonPHIIt());
2059 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
2060 }
2061
2062 // Update the incoming blocks for moved PHI nodes.
2063 OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
2064 OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
2065 InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
2066 InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
2067
2068 // Values defined in the outer loop header could be used in the inner loop
2069 // latch. In that case, we need to create LCSSA phis for them, because after
2070 // interchanging they will be defined in the new inner loop and used in the
2071 // new outer loop.
2072 SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
2073 for (Instruction &I :
2074 make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
2075 MayNeedLCSSAPhis.push_back(&I);
2076 formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE);
2077
2078 return true;
2079}
2080
2081bool LoopInterchangeTransform::adjustLoopLinks() {
2082 // Adjust all branches in the inner and outer loop.
2083 bool Changed = adjustLoopBranches();
2084 if (Changed) {
2085 // We have interchanged the preheaders so we need to interchange the data in
2086 // the preheaders as well. This is because the content of the inner
2087 // preheader was previously executed inside the outer loop.
2088 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
2089 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
2090 swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
2091 }
2092 return Changed;
2093}
2094
2098 LPMUpdater &U) {
2099 Function &F = *LN.getParent();
2100 SmallVector<Loop *, 8> LoopList(LN.getLoops());
2101
2102 if (MaxMemInstrCount < 1) {
2103 LLVM_DEBUG(dbgs() << "MaxMemInstrCount should be at least 1");
2104 return PreservedAnalyses::all();
2105 }
2107
2108 // Ensure minimum depth of the loop nest to do the interchange.
2109 if (!hasSupportedLoopDepth(LoopList, ORE))
2110 return PreservedAnalyses::all();
2111 // Ensure computable loop nest.
2112 if (!isComputableLoopNest(&AR.SE, LoopList)) {
2113 LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
2114 return PreservedAnalyses::all();
2115 }
2116
2117 ORE.emit([&]() {
2118 return OptimizationRemarkAnalysis(DEBUG_TYPE, "Dependence",
2121 << "Computed dependence info, invoking the transform.";
2122 });
2123
2124 DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
2125 if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &AR, &ORE).run(LN))
2126 return PreservedAnalyses::all();
2127 U.markLoopNestChanged(true);
2129}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the StringMap class.
Rewrite undef for PHI
ReachingDefInfo InstSet InstSet & Ignore
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define DEBUG_TYPE
Hexagon Common GEP
static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB)
Move the contents of SourceBB to before the last instruction of TargetBB.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the interface for the loop cache analysis.
SmallVector< Loop *, 4 > LoopVector
Definition LoopFuse.cpp:450
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:231
static cl::opt< unsigned int > MaxMemInstrCount("loop-interchange-max-meminstr-count", cl::init(64), cl::Hidden, cl::desc("Maximum number of load-store instructions that should be handled " "in the dependency matrix. Higher value may lead to more interchanges " "at the cost of compile-time"))
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
static PHINode * findInnerReductionPhi(Loop *L, Value *V, SmallVectorImpl< Instruction * > &HasNoWrapInsts)
static cl::opt< unsigned int > MinLoopNestDepth("loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden, cl::desc("Minimum depth of loop nest considered for the transform"))
static bool isComputableLoopNest(ScalarEvolution *SE, ArrayRef< Loop * > LoopList)
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static void simplifyLCSSAPhis(Loop *OuterLoop, Loop *InnerLoop)
This deals with a corner case when a LCSSA phi node appears in a non-exit block: the outer loop latch...
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
static void printDepMatrix(CharMatrix &DepMatrix)
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
static cl::opt< unsigned int > MaxLoopNestDepth("loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden, cl::desc("Maximum depth of loop nest considered for the transform"))
static bool hasSupportedLoopDepth(ArrayRef< Loop * > LoopList, OptimizationRemarkEmitter &ORE)
static bool inThisOrder(const Instruction *Src, const Instruction *Dst)
Return true if Src appears before Dst in the same basic block.
static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId)
Return true if we can vectorize the loop specified by LoopId.
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
static bool areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, SmallPtrSetImpl< PHINode * > &Reductions)
#define DEBUG_TYPE
static Value * followLCSSA(Value *SV)
static void populateWorklist(Loop &L, LoopVector &LoopList)
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE)
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
static std::optional< bool > isLexicographicallyPositive(ArrayRef< char > DV, unsigned Begin, unsigned End)
static cl::list< RuleTy > Profitabilities("loop-interchange-profitabilities", cl::ZeroOrMore, cl::MiscFlags::CommaSeparated, cl::Hidden, cl::desc("List of profitability heuristics to be used. They are applied in " "the given order"), cl::list_init< RuleTy >({RuleTy::PerLoopCacheAnalysis, RuleTy::PerInstrOrderCost, RuleTy::ForVectorization}), cl::values(clEnumValN(RuleTy::PerLoopCacheAnalysis, "cache", "Prioritize loop cache cost"), clEnumValN(RuleTy::PerInstrOrderCost, "instorder", "Prioritize the IVs order of each instruction"), clEnumValN(RuleTy::ForVectorization, "vectorize", "Prioritize vectorization"), clEnumValN(RuleTy::Ignore, "ignore", "Ignore profitability, force interchange (does not " "work with other options)")))
static bool noDuplicateRulesAndIgnore(ArrayRef< RuleTy > Rules)
This file defines the interface for the loop nest analysis.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
loop Loop Strength Reduction
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
uint64_t IntrinsicInst * II
#define P(N)
This file contains some templates that are useful if you are working with the STL at all.
SmallVector< Value *, 8 > ValueVector
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:191
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition BasicBlock.h:662
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
CacheCostTy getLoopCost(const Loop &L) const
Return the estimated cost of loop L if the given loop is part of the loop nest associated with this o...
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
DependenceInfo - This class is the main dependence-analysis driver.
LLVM_ABI std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool UnderRuntimeAssumptions=false)
depends - Tests for a dependence between the Src and Dst instructions.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
iterator begin() const
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End, bool CheckUniquePred=false)
Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Function * getParent() const
Return the function to which the loop-nest belongs.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition LoopInfo.cpp:632
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition LoopInfo.cpp:61
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
static unsigned getIncomingValueNumForOperand(unsigned i)
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
RecurKind getRecurrenceKind() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:102
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition StringMap.h:133
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
Definition StringMap.h:381
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
op_range operands()
Definition User.h:292
LLVM Value Representation.
Definition Value.h:75
iterator_range< user_iterator > users()
Definition Value.h:426
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
list_initializer< Ty > list_init(ArrayRef< Ty > Vals)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
InstructionCost Cost
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
LLVM_ABI bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition LCSSA.cpp:449
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
auto map_range(ContainerTy &&C, FuncTy F)
Definition STLExtras.h:364
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1968
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FMinimumNum
FP min with llvm.minimumnum semantics.
@ Or
Bitwise or logical OR of integers.
@ FMinimum
FP min with llvm.minimum semantics.
@ Mul
Product of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMaximum
FP max with llvm.maximum semantics.
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ FMul
Product of floats.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Add
Sum of integers.
@ FAdd
Sum of floats.
@ FMaximumNum
FP max with llvm.maximumnum semantics.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1954
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
Definition LCSSA.cpp:308
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...