Bug Summary

File:build/source/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
Warning:line 659, column 8
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name LoopInterchange.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm -resource-dir /usr/lib/llvm-16/lib/clang/16.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Transforms/Scalar -I /build/source/llvm/lib/Transforms/Scalar -I include -I /build/source/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm=build-llvm -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm=build-llvm -fcoverage-prefix-map=/build/source/= -source-date-epoch 1668078801 -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm -fdebug-prefix-map=/build/source/build-llvm=build-llvm -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-11-10-135928-647445-1 -x c++ /build/source/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
1//===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This Pass handles loop interchange transform.
10// This pass interchanges loops to provide a more cache-friendly memory access
11// patterns.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/Scalar/LoopInterchange.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/Analysis/DependenceAnalysis.h"
21#include "llvm/Analysis/LoopCacheAnalysis.h"
22#include "llvm/Analysis/LoopInfo.h"
23#include "llvm/Analysis/LoopNestAnalysis.h"
24#include "llvm/Analysis/LoopPass.h"
25#include "llvm/Analysis/OptimizationRemarkEmitter.h"
26#include "llvm/Analysis/ScalarEvolution.h"
27#include "llvm/Analysis/ScalarEvolutionExpressions.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/Constants.h"
30#include "llvm/IR/DiagnosticInfo.h"
31#include "llvm/IR/Dominators.h"
32#include "llvm/IR/Function.h"
33#include "llvm/IR/IRBuilder.h"
34#include "llvm/IR/InstrTypes.h"
35#include "llvm/IR/Instruction.h"
36#include "llvm/IR/Instructions.h"
37#include "llvm/IR/User.h"
38#include "llvm/IR/Value.h"
39#include "llvm/InitializePasses.h"
40#include "llvm/Pass.h"
41#include "llvm/Support/Casting.h"
42#include "llvm/Support/CommandLine.h"
43#include "llvm/Support/Debug.h"
44#include "llvm/Support/ErrorHandling.h"
45#include "llvm/Support/raw_ostream.h"
46#include "llvm/Transforms/Scalar.h"
47#include "llvm/Transforms/Scalar/LoopPassManager.h"
48#include "llvm/Transforms/Utils/BasicBlockUtils.h"
49#include "llvm/Transforms/Utils/LoopUtils.h"
50#include <cassert>
51#include <utility>
52#include <vector>
53
54using namespace llvm;
55
56#define DEBUG_TYPE"loop-interchange" "loop-interchange"
57
58STATISTIC(LoopsInterchanged, "Number of loops interchanged")static llvm::Statistic LoopsInterchanged = {"loop-interchange"
, "LoopsInterchanged", "Number of loops interchanged"}
;
59
60static cl::opt<int> LoopInterchangeCostThreshold(
61 "loop-interchange-threshold", cl::init(0), cl::Hidden,
62 cl::desc("Interchange if you gain more than this number"));
63
64namespace {
65
66using LoopVector = SmallVector<Loop *, 8>;
67
68// TODO: Check if we can use a sparse matrix here.
69using CharMatrix = std::vector<std::vector<char>>;
70
71} // end anonymous namespace
72
73// Maximum number of dependencies that can be handled in the dependency matrix.
74static const unsigned MaxMemInstrCount = 100;
75
76// Maximum loop depth supported.
77static const unsigned MaxLoopNestDepth = 10;
78
79#ifdef DUMP_DEP_MATRICIES
80static void printDepMatrix(CharMatrix &DepMatrix) {
81 for (auto &Row : DepMatrix) {
82 for (auto D : Row)
83 LLVM_DEBUG(dbgs() << D << " ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << D << " "; } } while
(false)
;
84 LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "\n"; } } while (false
)
;
85 }
86}
87#endif
88
89static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
90 Loop *L, DependenceInfo *DI,
91 ScalarEvolution *SE) {
92 using ValueVector = SmallVector<Value *, 16>;
93
94 ValueVector MemInstr;
95
96 // For each block.
97 for (BasicBlock *BB : L->blocks()) {
98 // Scan the BB and collect legal loads and stores.
99 for (Instruction &I : *BB) {
100 if (!isa<Instruction>(I))
101 return false;
102 if (auto *Ld = dyn_cast<LoadInst>(&I)) {
103 if (!Ld->isSimple())
104 return false;
105 MemInstr.push_back(&I);
106 } else if (auto *St = dyn_cast<StoreInst>(&I)) {
107 if (!St->isSimple())
108 return false;
109 MemInstr.push_back(&I);
110 }
111 }
112 }
113
114 LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found " << MemInstr
.size() << " Loads and Stores to analyze\n"; } } while (
false)
115 << " Loads and Stores to analyze\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found " << MemInstr
.size() << " Loads and Stores to analyze\n"; } } while (
false)
;
116
117 ValueVector::iterator I, IE, J, JE;
118
119 for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
120 for (J = I, JE = MemInstr.end(); J != JE; ++J) {
121 std::vector<char> Dep;
122 Instruction *Src = cast<Instruction>(*I);
123 Instruction *Dst = cast<Instruction>(*J);
124 // Ignore Input dependencies.
125 if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
126 continue;
127 // Track Output, Flow, and Anti dependencies.
128 if (auto D = DI->depends(Src, Dst, true)) {
129 assert(D->isOrdered() && "Expected an output, flow or anti dep.")(static_cast <bool> (D->isOrdered() && "Expected an output, flow or anti dep."
) ? void (0) : __assert_fail ("D->isOrdered() && \"Expected an output, flow or anti dep.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 129, __extension__
__PRETTY_FUNCTION__))
;
130 // If the direction vector is negative, normalize it to
131 // make it non-negative.
132 if (D->normalize(SE))
133 LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Negative dependence vector normalized.\n"
; } } while (false)
;
134 LLVM_DEBUG(StringRef DepType =do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
135 D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
136 dbgs() << "Found " << DepTypedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
137 << " dependency between Src and Dst\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
138 << " Src:" << *Src << "\n Dst:" << *Dst << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
;
139 unsigned Levels = D->getLevels();
140 char Direction;
141 for (unsigned II = 1; II <= Levels; ++II) {
142 if (D->isScalar(II)) {
143 Direction = 'S';
144 Dep.push_back(Direction);
145 } else {
146 unsigned Dir = D->getDirection(II);
147 if (Dir == Dependence::DVEntry::LT ||
148 Dir == Dependence::DVEntry::LE)
149 Direction = '<';
150 else if (Dir == Dependence::DVEntry::GT ||
151 Dir == Dependence::DVEntry::GE)
152 Direction = '>';
153 else if (Dir == Dependence::DVEntry::EQ)
154 Direction = '=';
155 else
156 Direction = '*';
157 Dep.push_back(Direction);
158 }
159 }
160 while (Dep.size() != Level) {
161 Dep.push_back('I');
162 }
163
164 DepMatrix.push_back(Dep);
165 if (DepMatrix.size() > MaxMemInstrCount) {
166 LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle more than "
<< MaxMemInstrCount << " dependencies inside loop\n"
; } } while (false)
167 << " dependencies inside loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle more than "
<< MaxMemInstrCount << " dependencies inside loop\n"
; } } while (false)
;
168 return false;
169 }
170 }
171 }
172 }
173
174 return true;
175}
176
177// A loop is moved from index 'from' to an index 'to'. Update the Dependence
178// matrix by exchanging the two columns.
179static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
180 unsigned ToIndx) {
181 for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I)
182 std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]);
183}
184
185// Checks if no dependence exist in the dependency matrix in Row before Column.
186static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
187 unsigned Column) {
188 for (unsigned i = 0; i < Column; ++i) {
189 if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' &&
190 DepMatrix[Row][i] != 'I')
191 return false;
192 }
193 return true;
194}
195
196static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
197 unsigned OuterLoopId, char InnerDep,
198 char OuterDep) {
199 if (InnerDep == OuterDep)
200 return true;
201
202 // It is legal to interchange if and only if after interchange no row has a
203 // '>' direction as the leftmost non-'='.
204
205 if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
206 return true;
207
208 if (InnerDep == '<')
209 return true;
210
211 if (InnerDep == '>') {
212 // If OuterLoopId represents outermost loop then interchanging will make the
213 // 1st dependency as '>'
214 if (OuterLoopId == 0)
215 return false;
216
217 // If all dependencies before OuterloopId are '=','S'or 'I'. Then
218 // interchanging will result in this row having an outermost non '='
219 // dependency of '>'
220 if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
221 return true;
222 }
223
224 return false;
225}
226
227// Checks if it is legal to interchange 2 loops.
228// [Theorem] A permutation of the loops in a perfect nest is legal if and only
229// if the direction matrix, after the same permutation is applied to its
230// columns, has no ">" direction as the leftmost non-"=" direction in any row.
231static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
232 unsigned InnerLoopId,
233 unsigned OuterLoopId) {
234 unsigned NumRows = DepMatrix.size();
235 // For each row check if it is valid to interchange.
236 for (unsigned Row = 0; Row < NumRows; ++Row) {
237 char InnerDep = DepMatrix[Row][InnerLoopId];
238 char OuterDep = DepMatrix[Row][OuterLoopId];
239 if (InnerDep == '*' || OuterDep == '*')
240 return false;
241 if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))
242 return false;
243 }
244 return true;
245}
246
247static void populateWorklist(Loop &L, LoopVector &LoopList) {
248 LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
249 << L.getHeader()->getParent()->getName() << " Loop: %"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
250 << L.getHeader()->getName() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
;
251 assert(LoopList.empty() && "LoopList should initially be empty!")(static_cast <bool> (LoopList.empty() && "LoopList should initially be empty!"
) ? void (0) : __assert_fail ("LoopList.empty() && \"LoopList should initially be empty!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 251, __extension__
__PRETTY_FUNCTION__))
;
252 Loop *CurrentLoop = &L;
253 const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
254 while (!Vec->empty()) {
255 // The current loop has multiple subloops in it hence it is not tightly
256 // nested.
257 // Discard all loops above it added into Worklist.
258 if (Vec->size() != 1) {
259 LoopList = {};
260 return;
261 }
262
263 LoopList.push_back(CurrentLoop);
264 CurrentLoop = Vec->front();
265 Vec = &CurrentLoop->getSubLoops();
266 }
267 LoopList.push_back(CurrentLoop);
268}
269
270namespace {
271
272/// LoopInterchangeLegality checks if it is legal to interchange the loop.
273class LoopInterchangeLegality {
274public:
275 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
276 OptimizationRemarkEmitter *ORE)
277 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
278
279 /// Check if the loops can be interchanged.
280 bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
281 CharMatrix &DepMatrix);
282
283 /// Discover induction PHIs in the header of \p L. Induction
284 /// PHIs are added to \p Inductions.
285 bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
286
287 /// Check if the loop structure is understood. We do not handle triangular
288 /// loops for now.
289 bool isLoopStructureUnderstood();
290
291 bool currentLimitations();
292
293 const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
294 return OuterInnerReductions;
295 }
296
297 const SmallVectorImpl<PHINode *> &getInnerLoopInductions() const {
298 return InnerLoopInductions;
299 }
300
301private:
302 bool tightlyNested(Loop *Outer, Loop *Inner);
303 bool containsUnsafeInstructions(BasicBlock *BB);
304
305 /// Discover induction and reduction PHIs in the header of \p L. Induction
306 /// PHIs are added to \p Inductions, reductions are added to
307 /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
308 /// to be passed as \p InnerLoop.
309 bool findInductionAndReductions(Loop *L,
310 SmallVector<PHINode *, 8> &Inductions,
311 Loop *InnerLoop);
312
313 Loop *OuterLoop;
314 Loop *InnerLoop;
315
316 ScalarEvolution *SE;
317
318 /// Interface to emit optimization remarks.
319 OptimizationRemarkEmitter *ORE;
320
321 /// Set of reduction PHIs taking part of a reduction across the inner and
322 /// outer loop.
323 SmallPtrSet<PHINode *, 4> OuterInnerReductions;
324
325 /// Set of inner loop induction PHIs
326 SmallVector<PHINode *, 8> InnerLoopInductions;
327};
328
329/// LoopInterchangeProfitability checks if it is profitable to interchange the
330/// loop.
331class LoopInterchangeProfitability {
332public:
333 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
334 OptimizationRemarkEmitter *ORE)
335 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
336
337 /// Check if the loop interchange is profitable.
338 bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
339 unsigned InnerLoopId, unsigned OuterLoopId,
340 CharMatrix &DepMatrix,
341 const DenseMap<const Loop *, unsigned> &CostMap);
342
343private:
344 int getInstrOrderCost();
345
346 Loop *OuterLoop;
347 Loop *InnerLoop;
348
349 /// Scev analysis.
350 ScalarEvolution *SE;
351
352 /// Interface to emit optimization remarks.
353 OptimizationRemarkEmitter *ORE;
354};
355
356/// LoopInterchangeTransform interchanges the loop.
357class LoopInterchangeTransform {
358public:
359 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
360 LoopInfo *LI, DominatorTree *DT,
361 const LoopInterchangeLegality &LIL)
362 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
363
364 /// Interchange OuterLoop and InnerLoop.
365 bool transform();
366 void restructureLoops(Loop *NewInner, Loop *NewOuter,
367 BasicBlock *OrigInnerPreHeader,
368 BasicBlock *OrigOuterPreHeader);
369 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
370
371private:
372 bool adjustLoopLinks();
373 bool adjustLoopBranches();
374
375 Loop *OuterLoop;
376 Loop *InnerLoop;
377
378 /// Scev analysis.
379 ScalarEvolution *SE;
380
381 LoopInfo *LI;
382 DominatorTree *DT;
383
384 const LoopInterchangeLegality &LIL;
385};
386
387struct LoopInterchange {
388 ScalarEvolution *SE = nullptr;
389 LoopInfo *LI = nullptr;
390 DependenceInfo *DI = nullptr;
391 DominatorTree *DT = nullptr;
392 std::unique_ptr<CacheCost> CC = nullptr;
393
394 /// Interface to emit optimization remarks.
395 OptimizationRemarkEmitter *ORE;
396
397 LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
398 DominatorTree *DT, std::unique_ptr<CacheCost> &CC,
399 OptimizationRemarkEmitter *ORE)
400 : SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC)), ORE(ORE) {}
401
402 bool run(Loop *L) {
403 if (L->getParentLoop())
404 return false;
405 SmallVector<Loop *, 8> LoopList;
406 populateWorklist(*L, LoopList);
407 return processLoopList(LoopList);
408 }
409
410 bool run(LoopNest &LN) {
411 SmallVector<Loop *, 8> LoopList(LN.getLoops().begin(), LN.getLoops().end());
412 for (unsigned I = 1; I < LoopList.size(); ++I)
413 if (LoopList[I]->getParentLoop() != LoopList[I - 1])
414 return false;
415 return processLoopList(LoopList);
416 }
417
418 bool isComputableLoopNest(ArrayRef<Loop *> LoopList) {
419 for (Loop *L : LoopList) {
420 const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
421 if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
422 LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Couldn't compute backedge count\n"
; } } while (false)
;
423 return false;
424 }
425 if (L->getNumBackEdges() != 1) {
426 LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "NumBackEdges is not equal to 1\n"
; } } while (false)
;
427 return false;
428 }
429 if (!L->getExitingBlock()) {
430 LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop doesn't have unique exit block\n"
; } } while (false)
;
431 return false;
432 }
433 }
434 return true;
435 }
436
437 unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
438 // TODO: Add a better heuristic to select the loop to be interchanged based
439 // on the dependence matrix. Currently we select the innermost loop.
440 return LoopList.size() - 1;
441 }
442
443 bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
444 bool Changed = false;
445 unsigned LoopNestDepth = LoopList.size();
446 if (LoopNestDepth < 2) {
447 LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop doesn't contain minimum nesting level.\n"
; } } while (false)
;
448 return false;
449 }
450 if (LoopNestDepth > MaxLoopNestDepth) {
451 LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle loops of depth greater than "
<< MaxLoopNestDepth << "\n"; } } while (false)
452 << MaxLoopNestDepth << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle loops of depth greater than "
<< MaxLoopNestDepth << "\n"; } } while (false)
;
453 return false;
454 }
455 if (!isComputableLoopNest(LoopList)) {
456 LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not valid loop candidate for interchange\n"
; } } while (false)
;
457 return false;
458 }
459
460 LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepthdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing LoopList of size = "
<< LoopNestDepth << "\n"; } } while (false)
461 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing LoopList of size = "
<< LoopNestDepth << "\n"; } } while (false)
;
462
463 CharMatrix DependencyMatrix;
464 Loop *OuterMostLoop = *(LoopList.begin());
465 if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
466 OuterMostLoop, DI, SE)) {
467 LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Populating dependency matrix failed\n"
; } } while (false)
;
468 return false;
469 }
470#ifdef DUMP_DEP_MATRICIES
471 LLVM_DEBUG(dbgs() << "Dependence before interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Dependence before interchange\n"
; } } while (false)
;
472 printDepMatrix(DependencyMatrix);
473#endif
474
475 // Get the Outermost loop exit.
476 BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
477 if (!LoopNestExit) {
478 LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "OuterMostLoop needs an unique exit block"
; } } while (false)
;
479 return false;
480 }
481
482 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
483 // Obtain the loop vector returned from loop cache analysis beforehand,
484 // and put each <Loop, index> pair into a map for constant time query
485 // later. Indices in loop vector reprsent the optimal order of the
486 // corresponding loop, e.g., given a loopnest with depth N, index 0
487 // indicates the loop should be placed as the outermost loop and index N
488 // indicates the loop should be placed as the innermost loop.
489 //
490 // For the old pass manager CacheCost would be null.
491 DenseMap<const Loop *, unsigned> CostMap;
492 if (CC != nullptr) {
493 const auto &LoopCosts = CC->getLoopCosts();
494 for (unsigned i = 0; i < LoopCosts.size(); i++) {
495 CostMap[LoopCosts[i].first] = i;
496 }
497 }
498 // We try to achieve the globally optimal memory access for the loopnest,
499 // and do interchange based on a bubble-sort fasion. We start from
500 // the innermost loop, move it outwards to the best possible position
501 // and repeat this process.
502 for (unsigned j = SelecLoopId; j > 0; j--) {
503 bool ChangedPerIter = false;
504 for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
505 bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
506 DependencyMatrix, CostMap);
507 if (!Interchanged)
508 continue;
509 // Loops interchanged, update LoopList accordingly.
510 std::swap(LoopList[i - 1], LoopList[i]);
511 // Update the DependencyMatrix
512 interChangeDependencies(DependencyMatrix, i, i - 1);
513#ifdef DUMP_DEP_MATRICIES
514 LLVM_DEBUG(dbgs() << "Dependence after interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Dependence after interchange\n"
; } } while (false)
;
515 printDepMatrix(DependencyMatrix);
516#endif
517 ChangedPerIter |= Interchanged;
518 Changed |= Interchanged;
519 }
520 // Early abort if there was no interchange during an entire round of
521 // moving loops outwards.
522 if (!ChangedPerIter)
523 break;
524 }
525 return Changed;
526 }
527
528 bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
529 unsigned OuterLoopId,
530 std::vector<std::vector<char>> &DependencyMatrix,
531 const DenseMap<const Loop *, unsigned> &CostMap) {
532 LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << "\n"; } } while (false)
533 << " and OuterLoopId = " << OuterLoopId << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << "\n"; } } while (false)
;
534 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
535 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
536 LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not interchanging loops. Cannot prove legality.\n"
; } } while (false)
;
537 return false;
538 }
539 LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops are legal to interchange\n"
; } } while (false)
;
540 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
541 if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
542 DependencyMatrix, CostMap)) {
543 LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Interchanging loops not profitable.\n"
; } } while (false)
;
544 return false;
545 }
546
547 ORE->emit([&]() {
548 return OptimizationRemark(DEBUG_TYPE"loop-interchange", "Interchanged",
549 InnerLoop->getStartLoc(),
550 InnerLoop->getHeader())
551 << "Loop interchanged with enclosing loop.";
552 });
553
554 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
555 LIT.transform();
556 LLVM_DEBUG(dbgs() << "Loops interchanged.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops interchanged.\n"
; } } while (false)
;
557 LoopsInterchanged++;
558
559 llvm::formLCSSARecursively(*OuterLoop, *DT, LI, SE);
560 return true;
561 }
562};
563
564} // end anonymous namespace
565
566bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
567 return any_of(*BB, [](const Instruction &I) {
568 return I.mayHaveSideEffects() || I.mayReadFromMemory();
569 });
570}
571
572bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
573 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
574 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
575 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
576
577 LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Checking if loops are tightly nested\n"
; } } while (false)
;
578
579 // A perfectly nested loop will not have any branch in between the outer and
580 // inner block i.e. outer header will branch to either inner preheader and
581 // outerloop latch.
582 BranchInst *OuterLoopHeaderBI =
583 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
584 if (!OuterLoopHeaderBI)
585 return false;
586
587 for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
588 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
589 Succ != OuterLoopLatch)
590 return false;
591
592 LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Checking instructions in Loop header and Loop latch\n"
; } } while (false)
;
593 // We do not have any basic block in between now make sure the outer header
594 // and outer loop latch doesn't contain any unsafe instructions.
595 if (containsUnsafeInstructions(OuterLoopHeader) ||
596 containsUnsafeInstructions(OuterLoopLatch))
597 return false;
598
599 // Also make sure the inner loop preheader does not contain any unsafe
600 // instructions. Note that all instructions in the preheader will be moved to
601 // the outer loop header when interchanging.
602 if (InnerLoopPreHeader != OuterLoopHeader &&
603 containsUnsafeInstructions(InnerLoopPreHeader))
604 return false;
605
606 BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
607 // Ensure the inner loop exit block flows to the outer loop latch possibly
608 // through empty blocks.
609 const BasicBlock &SuccInner =
610 LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch);
611 if (&SuccInner != OuterLoopLatch) {
612 LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExitdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop exit block "
<< *InnerLoopExit << " does not lead to the outer loop latch.\n"
;; } } while (false)
613 << " does not lead to the outer loop latch.\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop exit block "
<< *InnerLoopExit << " does not lead to the outer loop latch.\n"
;; } } while (false)
;
614 return false;
615 }
616 // The inner loop exit block does flow to the outer loop latch and not some
617 // other BBs, now make sure it contains safe instructions, since it will be
618 // moved into the (new) inner loop after interchange.
619 if (containsUnsafeInstructions(InnerLoopExit))
620 return false;
621
622 LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops are perfectly nested\n"
; } } while (false)
;
623 // We have a perfect loop nest.
624 return true;
625}
626
627bool LoopInterchangeLegality::isLoopStructureUnderstood() {
628 BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
629 for (PHINode *InnerInduction : InnerLoopInductions) {
10
Assuming '__begin1' is equal to '__end1'
630 unsigned Num = InnerInduction->getNumOperands();
631 for (unsigned i = 0; i < Num; ++i) {
632 Value *Val = InnerInduction->getOperand(i);
633 if (isa<Constant>(Val))
634 continue;
635 Instruction *I = dyn_cast<Instruction>(Val);
636 if (!I)
637 return false;
638 // TODO: Handle triangular loops.
639 // e.g. for(int i=0;i<N;i++)
640 // for(int j=i;j<N;j++)
641 unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
642 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
643 InnerLoopPreheader &&
644 !OuterLoop->isLoopInvariant(I)) {
645 return false;
646 }
647 }
648 }
649
650 // TODO: Handle triangular loops of another form.
651 // e.g. for(int i=0;i<N;i++)
652 // for(int j=0;j<i;j++)
653 // or,
654 // for(int i=0;i<N;i++)
655 // for(int j=0;j*i<N;j++)
656 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
657 BranchInst *InnerLoopLatchBI =
12
'InnerLoopLatchBI' initialized to a null pointer value
658 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
11
Assuming the object is not a 'CastReturnType'
659 if (!InnerLoopLatchBI->isConditional())
13
Called C++ object pointer is null
660 return false;
661 if (CmpInst *InnerLoopCmp =
662 dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) {
663 Value *Op0 = InnerLoopCmp->getOperand(0);
664 Value *Op1 = InnerLoopCmp->getOperand(1);
665
666 // LHS and RHS of the inner loop exit condition, e.g.,
667 // in "for(int j=0;j<i;j++)", LHS is j and RHS is i.
668 Value *Left = nullptr;
669 Value *Right = nullptr;
670
671 // Check if V only involves inner loop induction variable.
672 // Return true if V is InnerInduction, or a cast from
673 // InnerInduction, or a binary operator that involves
674 // InnerInduction and a constant.
675 std::function<bool(Value *)> IsPathToInnerIndVar;
676 IsPathToInnerIndVar = [this, &IsPathToInnerIndVar](const Value *V) -> bool {
677 if (llvm::is_contained(InnerLoopInductions, V))
678 return true;
679 if (isa<Constant>(V))
680 return true;
681 const Instruction *I = dyn_cast<Instruction>(V);
682 if (!I)
683 return false;
684 if (isa<CastInst>(I))
685 return IsPathToInnerIndVar(I->getOperand(0));
686 if (isa<BinaryOperator>(I))
687 return IsPathToInnerIndVar(I->getOperand(0)) &&
688 IsPathToInnerIndVar(I->getOperand(1));
689 return false;
690 };
691
692 // In case of multiple inner loop indvars, it is okay if LHS and RHS
693 // are both inner indvar related variables.
694 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
695 return true;
696
697 // Otherwise we check if the cmp instruction compares an inner indvar
698 // related variable (Left) with a outer loop invariant (Right).
699 if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
700 Left = Op0;
701 Right = Op1;
702 } else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
703 Left = Op1;
704 Right = Op0;
705 }
706
707 if (Left == nullptr)
708 return false;
709
710 const SCEV *S = SE->getSCEV(Right);
711 if (!SE->isLoopInvariant(S, OuterLoop))
712 return false;
713 }
714
715 return true;
716}
717
718// If SV is a LCSSA PHI node with a single incoming value, return the incoming
719// value.
720static Value *followLCSSA(Value *SV) {
721 PHINode *PHI = dyn_cast<PHINode>(SV);
722 if (!PHI)
723 return SV;
724
725 if (PHI->getNumIncomingValues() != 1)
726 return SV;
727 return followLCSSA(PHI->getIncomingValue(0));
728}
729
730// Check V's users to see if it is involved in a reduction in L.
731static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
732 // Reduction variables cannot be constants.
733 if (isa<Constant>(V))
734 return nullptr;
735
736 for (Value *User : V->users()) {
737 if (PHINode *PHI = dyn_cast<PHINode>(User)) {
738 if (PHI->getNumIncomingValues() == 1)
739 continue;
740 RecurrenceDescriptor RD;
741 if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) {
742 // Detect floating point reduction only when it can be reordered.
743 if (RD.getExactFPMathInst() != nullptr)
744 return nullptr;
745 return PHI;
746 }
747 return nullptr;
748 }
749 }
750
751 return nullptr;
752}
753
754bool LoopInterchangeLegality::findInductionAndReductions(
755 Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
756 if (!L->getLoopLatch() || !L->getLoopPredecessor())
757 return false;
758 for (PHINode &PHI : L->getHeader()->phis()) {
759 RecurrenceDescriptor RD;
760 InductionDescriptor ID;
761 if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
762 Inductions.push_back(&PHI);
763 else {
764 // PHIs in inner loops need to be part of a reduction in the outer loop,
765 // discovered when checking the PHIs of the outer loop earlier.
766 if (!InnerLoop) {
767 if (!OuterInnerReductions.count(&PHI)) {
768 LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop PHI is not part of reductions "
"across the outer loop.\n"; } } while (false)
769 "across the outer loop.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop PHI is not part of reductions "
"across the outer loop.\n"; } } while (false)
;
770 return false;
771 }
772 } else {
773 assert(PHI.getNumIncomingValues() == 2 &&(static_cast <bool> (PHI.getNumIncomingValues() == 2 &&
"Phis in loop header should have exactly 2 incoming values")
? void (0) : __assert_fail ("PHI.getNumIncomingValues() == 2 && \"Phis in loop header should have exactly 2 incoming values\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 774, __extension__
__PRETTY_FUNCTION__))
774 "Phis in loop header should have exactly 2 incoming values")(static_cast <bool> (PHI.getNumIncomingValues() == 2 &&
"Phis in loop header should have exactly 2 incoming values")
? void (0) : __assert_fail ("PHI.getNumIncomingValues() == 2 && \"Phis in loop header should have exactly 2 incoming values\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 774, __extension__
__PRETTY_FUNCTION__))
;
775 // Check if we have a PHI node in the outer loop that has a reduction
776 // result from the inner loop as an incoming value.
777 Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
778 PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
779 if (!InnerRedPhi ||
780 !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
781 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
782 dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
783 << "Failed to recognize PHI as an induction or reduction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
;
784 return false;
785 }
786 OuterInnerReductions.insert(&PHI);
787 OuterInnerReductions.insert(InnerRedPhi);
788 }
789 }
790 }
791 return true;
792}
793
794// This function indicates the current limitations in the transform as a result
795// of which we do not proceed.
796bool LoopInterchangeLegality::currentLimitations() {
797 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
798
799 // transform currently expects the loop latches to also be the exiting
800 // blocks.
801 if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
1
Assuming the condition is false
5
Taking false branch
802 OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
2
Assuming the condition is false
803 !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
3
Assuming the object is a 'class llvm::BranchInst &'
804 !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
4
Assuming the object is a 'class llvm::BranchInst &'
805 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
806 dbgs() << "Loops where the latch is not the exiting block are not"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
807 << " supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
;
808 ORE->emit([&]() {
809 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "ExitingNotLatch",
810 OuterLoop->getStartLoc(),
811 OuterLoop->getHeader())
812 << "Loops where the latch is not the exiting block cannot be"
813 " interchange currently.";
814 });
815 return true;
816 }
817
818 SmallVector<PHINode *, 8> Inductions;
819 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
6
Taking false branch
820 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
821 dbgs() << "Only outer loops with induction or reduction PHI nodes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
822 << "are supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
;
823 ORE->emit([&]() {
824 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIOuter",
825 OuterLoop->getStartLoc(),
826 OuterLoop->getHeader())
827 << "Only outer loops with induction or reduction PHI nodes can be"
828 " interchanged currently.";
829 });
830 return true;
831 }
832
833 Inductions.clear();
834 // For multi-level loop nests, make sure that all phi nodes for inner loops
835 // at all levels can be recognized as a induction or reduction phi. Bail out
836 // if a phi node at a certain nesting level cannot be properly recognized.
837 Loop *CurLevelLoop = OuterLoop;
838 while (!CurLevelLoop->getSubLoops().empty()) {
7
Assuming the condition is false
8
Loop condition is false. Execution continues on line 857
839 // We already made sure that the loop nest is tightly nested.
840 CurLevelLoop = CurLevelLoop->getSubLoops().front();
841 if (!findInductionAndReductions(CurLevelLoop, Inductions, nullptr)) {
842 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
843 dbgs() << "Only inner loops with induction or reduction PHI nodes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
844 << "are supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
;
845 ORE->emit([&]() {
846 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIInner",
847 CurLevelLoop->getStartLoc(),
848 CurLevelLoop->getHeader())
849 << "Only inner loops with induction or reduction PHI nodes can be"
850 " interchange currently.";
851 });
852 return true;
853 }
854 }
855
856 // TODO: Triangular loops are not handled for now.
857 if (!isLoopStructureUnderstood()) {
9
Calling 'LoopInterchangeLegality::isLoopStructureUnderstood'
858 LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop structure not understood by pass\n"
; } } while (false)
;
859 ORE->emit([&]() {
860 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedStructureInner",
861 InnerLoop->getStartLoc(),
862 InnerLoop->getHeader())
863 << "Inner loop structure not understood currently.";
864 });
865 return true;
866 }
867
868 return false;
869}
870
871bool LoopInterchangeLegality::findInductions(
872 Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
873 for (PHINode &PHI : L->getHeader()->phis()) {
874 InductionDescriptor ID;
875 if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
876 Inductions.push_back(&PHI);
877 }
878 return !Inductions.empty();
879}
880
881// We currently only support LCSSA PHI nodes in the inner loop exit, if their
882// users are either reduction PHIs or PHIs outside the outer loop (which means
883// the we are only interested in the final value after the loop).
884static bool
885areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL,
886 SmallPtrSetImpl<PHINode *> &Reductions) {
887 BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
888 for (PHINode &PHI : InnerExit->phis()) {
889 // Reduction lcssa phi will have only 1 incoming block that from loop latch.
890 if (PHI.getNumIncomingValues() > 1)
891 return false;
892 if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
893 PHINode *PN = dyn_cast<PHINode>(U);
894 return !PN ||
895 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
896 })) {
897 return false;
898 }
899 }
900 return true;
901}
902
903// We currently support LCSSA PHI nodes in the outer loop exit, if their
904// incoming values do not come from the outer loop latch or if the
905// outer loop latch has a single predecessor. In that case, the value will
906// be available if both the inner and outer loop conditions are true, which
907// will still be true after interchanging. If we have multiple predecessor,
908// that may not be the case, e.g. because the outer loop latch may be executed
909// if the inner loop is not executed.
910static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
911 BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
912 for (PHINode &PHI : LoopNestExit->phis()) {
913 for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
914 Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
915 if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
916 continue;
917
918 // The incoming value is defined in the outer loop latch. Currently we
919 // only support that in case the outer loop latch has a single predecessor.
920 // This guarantees that the outer loop latch is executed if and only if
921 // the inner loop is executed (because tightlyNested() guarantees that the
922 // outer loop header only branches to the inner loop or the outer loop
923 // latch).
924 // FIXME: We could weaken this logic and allow multiple predecessors,
925 // if the values are produced outside the loop latch. We would need
926 // additional logic to update the PHI nodes in the exit block as
927 // well.
928 if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
929 return false;
930 }
931 }
932 return true;
933}
934
935// In case of multi-level nested loops, it may occur that lcssa phis exist in
936// the latch of InnerLoop, i.e., when defs of the incoming values are further
937// inside the loopnest. Sometimes those incoming values are not available
938// after interchange, since the original inner latch will become the new outer
939// latch which may have predecessor paths that do not include those incoming
940// values.
941// TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of
942// multi-level loop nests.
943static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
944 if (InnerLoop->getSubLoops().empty())
945 return true;
946 // If the original outer latch has only one predecessor, then values defined
947 // further inside the looploop, e.g., in the innermost loop, will be available
948 // at the new outer latch after interchange.
949 if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr)
950 return true;
951
952 // The outer latch has more than one predecessors, i.e., the inner
953 // exit and the inner header.
954 // PHI nodes in the inner latch are lcssa phis where the incoming values
955 // are defined further inside the loopnest. Check if those phis are used
956 // in the original inner latch. If that is the case then bail out since
957 // those incoming values may not be available at the new outer latch.
958 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
959 for (PHINode &PHI : InnerLoopLatch->phis()) {
960 for (auto *U : PHI.users()) {
961 Instruction *UI = cast<Instruction>(U);
962 if (InnerLoopLatch == UI->getParent())
963 return false;
964 }
965 }
966 return true;
967}
968
969bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
970 unsigned OuterLoopId,
971 CharMatrix &DepMatrix) {
972 if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
973 LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
974 << " and OuterLoopId = " << OuterLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
975 << " due to dependence\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
;
976 ORE->emit([&]() {
977 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "Dependence",
978 InnerLoop->getStartLoc(),
979 InnerLoop->getHeader())
980 << "Cannot interchange loops due to dependences.";
981 });
982 return false;
983 }
984 // Check if outer and inner loop contain legal instructions only.
985 for (auto *BB : OuterLoop->blocks())
986 for (Instruction &I : BB->instructionsWithoutDebug())
987 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
988 // readnone functions do not prevent interchanging.
989 if (CI->onlyWritesMemory())
990 continue;
991 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
992 dbgs() << "Loops with call instructions cannot be interchanged "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
993 << "safely.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
;
994 ORE->emit([&]() {
995 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "CallInst",
996 CI->getDebugLoc(),
997 CI->getParent())
998 << "Cannot interchange loops due to call instruction.";
999 });
1000
1001 return false;
1002 }
1003
1004 if (!findInductions(InnerLoop, InnerLoopInductions)) {
1005 LLVM_DEBUG(dbgs() << "Cound not find inner loop induction variables.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cound not find inner loop induction variables.\n"
; } } while (false)
;
1006 return false;
1007 }
1008
1009 if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) {
1010 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in inner loop latch.\n"
; } } while (false)
;
1011 ORE->emit([&]() {
1012 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedInnerLatchPHI",
1013 InnerLoop->getStartLoc(),
1014 InnerLoop->getHeader())
1015 << "Cannot interchange loops because unsupported PHI nodes found "
1016 "in inner loop latch.";
1017 });
1018 return false;
1019 }
1020
1021 // TODO: The loops could not be interchanged due to current limitations in the
1022 // transform module.
1023 if (currentLimitations()) {
1024 LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not legal because of current transform limitation\n"
; } } while (false)
;
1025 return false;
1026 }
1027
1028 // Check if the loops are tightly nested.
1029 if (!tightlyNested(OuterLoop, InnerLoop)) {
1030 LLVM_DEBUG(dbgs() << "Loops not tightly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops not tightly nested\n"
; } } while (false)
;
1031 ORE->emit([&]() {
1032 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NotTightlyNested",
1033 InnerLoop->getStartLoc(),
1034 InnerLoop->getHeader())
1035 << "Cannot interchange loops because they are not tightly "
1036 "nested.";
1037 });
1038 return false;
1039 }
1040
1041 if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1042 OuterInnerReductions)) {
1043 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in inner loop exit.\n"
; } } while (false)
;
1044 ORE->emit([&]() {
1045 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedExitPHI",
1046 InnerLoop->getStartLoc(),
1047 InnerLoop->getHeader())
1048 << "Found unsupported PHI node in loop exit.";
1049 });
1050 return false;
1051 }
1052
1053 if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1054 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in outer loop exit.\n"
; } } while (false)
;
1055 ORE->emit([&]() {
1056 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedExitPHI",
1057 OuterLoop->getStartLoc(),
1058 OuterLoop->getHeader())
1059 << "Found unsupported PHI node in loop exit.";
1060 });
1061 return false;
1062 }
1063
1064 return true;
1065}
1066
1067int LoopInterchangeProfitability::getInstrOrderCost() {
1068 unsigned GoodOrder, BadOrder;
1069 BadOrder = GoodOrder = 0;
1070 for (BasicBlock *BB : InnerLoop->blocks()) {
1071 for (Instruction &Ins : *BB) {
1072 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1073 unsigned NumOp = GEP->getNumOperands();
1074 bool FoundInnerInduction = false;
1075 bool FoundOuterInduction = false;
1076 for (unsigned i = 0; i < NumOp; ++i) {
1077 // Skip operands that are not SCEV-able.
1078 if (!SE->isSCEVable(GEP->getOperand(i)->getType()))
1079 continue;
1080
1081 const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
1082 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1083 if (!AR)
1084 continue;
1085
1086 // If we find the inner induction after an outer induction e.g.
1087 // for(int i=0;i<N;i++)
1088 // for(int j=0;j<N;j++)
1089 // A[i][j] = A[i-1][j-1]+k;
1090 // then it is a good order.
1091 if (AR->getLoop() == InnerLoop) {
1092 // We found an InnerLoop induction after OuterLoop induction. It is
1093 // a good order.
1094 FoundInnerInduction = true;
1095 if (FoundOuterInduction) {
1096 GoodOrder++;
1097 break;
1098 }
1099 }
1100 // If we find the outer induction after an inner induction e.g.
1101 // for(int i=0;i<N;i++)
1102 // for(int j=0;j<N;j++)
1103 // A[j][i] = A[j-1][i-1]+k;
1104 // then it is a bad order.
1105 if (AR->getLoop() == OuterLoop) {
1106 // We found an OuterLoop induction after InnerLoop induction. It is
1107 // a bad order.
1108 FoundOuterInduction = true;
1109 if (FoundInnerInduction) {
1110 BadOrder++;
1111 break;
1112 }
1113 }
1114 }
1115 }
1116 }
1117 }
1118 return GoodOrder - BadOrder;
1119}
1120
1121static bool isProfitableForVectorization(unsigned InnerLoopId,
1122 unsigned OuterLoopId,
1123 CharMatrix &DepMatrix) {
1124 // TODO: Improve this heuristic to catch more cases.
1125 // If the inner loop is loop independent or doesn't carry any dependency it is
1126 // profitable to move this to outer position.
1127 for (auto &Row : DepMatrix) {
1128 if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I')
1129 return false;
1130 // TODO: We need to improve this heuristic.
1131 if (Row[OuterLoopId] != '=')
1132 return false;
1133 }
1134 // If outer loop has dependence and inner loop is loop independent then it is
1135 // profitable to interchange to enable parallelism.
1136 // If there are no dependences, interchanging will not improve anything.
1137 return !DepMatrix.empty();
1138}
1139
1140bool LoopInterchangeProfitability::isProfitable(
1141 const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
1142 unsigned OuterLoopId, CharMatrix &DepMatrix,
1143 const DenseMap<const Loop *, unsigned> &CostMap) {
1144 // TODO: Remove the legacy cost model.
1145
1146 // This is the new cost model returned from loop cache analysis.
1147 // A smaller index means the loop should be placed an outer loop, and vice
1148 // versa.
1149 if (CostMap.find(InnerLoop) != CostMap.end() &&
1150 CostMap.find(OuterLoop) != CostMap.end()) {
1151 unsigned InnerIndex = 0, OuterIndex = 0;
1152 InnerIndex = CostMap.find(InnerLoop)->second;
1153 OuterIndex = CostMap.find(OuterLoop)->second;
1154 LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndexdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "InnerIndex = " <<
InnerIndex << ", OuterIndex = " << OuterIndex <<
"\n"; } } while (false)
1155 << ", OuterIndex = " << OuterIndex << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "InnerIndex = " <<
InnerIndex << ", OuterIndex = " << OuterIndex <<
"\n"; } } while (false)
;
1156 if (InnerIndex < OuterIndex)
1157 return true;
1158 } else {
1159 // Legacy cost model: this is rough cost estimation algorithm. It counts the
1160 // good and bad order of induction variables in the instruction and allows
1161 // reordering if number of bad orders is more than good.
1162 int Cost = getInstrOrderCost();
1163 LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cost = " << Cost
<< "\n"; } } while (false)
;
1164 if (Cost < -LoopInterchangeCostThreshold)
1165 return true;
1166 }
1167
1168 // It is not profitable as per current cache profitability model. But check if
1169 // we can move this loop outside to improve parallelism.
1170 if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
1171 return true;
1172
1173 ORE->emit([&]() {
1174 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "InterchangeNotProfitable",
1175 InnerLoop->getStartLoc(),
1176 InnerLoop->getHeader())
1177 << "Interchanging loops is too costly and it does not improve "
1178 "parallelism.";
1179 });
1180 return false;
1181}
1182
1183void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1184 Loop *InnerLoop) {
1185 for (Loop *L : *OuterLoop)
1186 if (L == InnerLoop) {
1187 OuterLoop->removeChildLoop(L);
1188 return;
1189 }
1190 llvm_unreachable("Couldn't find loop")::llvm::llvm_unreachable_internal("Couldn't find loop", "llvm/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1190)
;
1191}
1192
1193/// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1194/// new inner and outer loop after interchanging: NewInner is the original
1195/// outer loop and NewOuter is the original inner loop.
1196///
1197/// Before interchanging, we have the following structure
1198/// Outer preheader
1199// Outer header
1200// Inner preheader
1201// Inner header
1202// Inner body
1203// Inner latch
1204// outer bbs
1205// Outer latch
1206//
1207// After interchanging:
1208// Inner preheader
1209// Inner header
1210// Outer preheader
1211// Outer header
1212// Inner body
1213// outer bbs
1214// Outer latch
1215// Inner latch
1216void LoopInterchangeTransform::restructureLoops(
1217 Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1218 BasicBlock *OrigOuterPreHeader) {
1219 Loop *OuterLoopParent = OuterLoop->getParentLoop();
1220 // The original inner loop preheader moves from the new inner loop to
1221 // the parent loop, if there is one.
1222 NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1223 LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1224
1225 // Switch the loop levels.
1226 if (OuterLoopParent) {
1227 // Remove the loop from its parent loop.
1228 removeChildLoop(OuterLoopParent, NewInner);
1229 removeChildLoop(NewInner, NewOuter);
1230 OuterLoopParent->addChildLoop(NewOuter);
1231 } else {
1232 removeChildLoop(NewInner, NewOuter);
1233 LI->changeTopLevelLoop(NewInner, NewOuter);
1234 }
1235 while (!NewOuter->isInnermost())
1236 NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1237 NewOuter->addChildLoop(NewInner);
1238
1239 // BBs from the original inner loop.
1240 SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1241
1242 // Add BBs from the original outer loop to the original inner loop (excluding
1243 // BBs already in inner loop)
1244 for (BasicBlock *BB : NewInner->blocks())
1245 if (LI->getLoopFor(BB) == NewInner)
1246 NewOuter->addBlockEntry(BB);
1247
1248 // Now remove inner loop header and latch from the new inner loop and move
1249 // other BBs (the loop body) to the new inner loop.
1250 BasicBlock *OuterHeader = NewOuter->getHeader();
1251 BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1252 for (BasicBlock *BB : OrigInnerBBs) {
1253 // Nothing will change for BBs in child loops.
1254 if (LI->getLoopFor(BB) != NewOuter)
1255 continue;
1256 // Remove the new outer loop header and latch from the new inner loop.
1257 if (BB == OuterHeader || BB == OuterLatch)
1258 NewInner->removeBlockFromLoop(BB);
1259 else
1260 LI->changeLoopFor(BB, NewInner);
1261 }
1262
1263 // The preheader of the original outer loop becomes part of the new
1264 // outer loop.
1265 NewOuter->addBlockEntry(OrigOuterPreHeader);
1266 LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1267
1268 // Tell SE that we move the loops around.
1269 SE->forgetLoop(NewOuter);
1270 SE->forgetLoop(NewInner);
1271}
1272
1273bool LoopInterchangeTransform::transform() {
1274 bool Transformed = false;
1275
1276 if (InnerLoop->getSubLoops().empty()) {
1277 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1278 LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Splitting the inner loop latch\n"
; } } while (false)
;
1279 auto &InductionPHIs = LIL.getInnerLoopInductions();
1280 if (InductionPHIs.empty()) {
1281 LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to find the point to split loop latch \n"
; } } while (false)
;
1282 return false;
1283 }
1284
1285 SmallVector<Instruction *, 8> InnerIndexVarList;
1286 for (PHINode *CurInductionPHI : InductionPHIs) {
1287 if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1288 InnerIndexVarList.push_back(
1289 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
1290 else
1291 InnerIndexVarList.push_back(
1292 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
1293 }
1294
1295 // Create a new latch block for the inner loop. We split at the
1296 // current latch's terminator and then move the condition and all
1297 // operands that are not either loop-invariant or the induction PHI into the
1298 // new latch block.
1299 BasicBlock *NewLatch =
1300 SplitBlock(InnerLoop->getLoopLatch(),
1301 InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
1302
1303 SmallSetVector<Instruction *, 4> WorkList;
1304 unsigned i = 0;
1305 auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() {
1306 for (; i < WorkList.size(); i++) {
1307 // Duplicate instruction and move it the new latch. Update uses that
1308 // have been moved.
1309 Instruction *NewI = WorkList[i]->clone();
1310 NewI->insertBefore(NewLatch->getFirstNonPHI());
1311 assert(!NewI->mayHaveSideEffects() &&(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1313, __extension__
__PRETTY_FUNCTION__))
1312 "Moving instructions with side-effects may change behavior of "(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1313, __extension__
__PRETTY_FUNCTION__))
1313 "the loop nest!")(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1313, __extension__
__PRETTY_FUNCTION__))
;
1314 for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) {
1315 Instruction *UserI = cast<Instruction>(U.getUser());
1316 if (!InnerLoop->contains(UserI->getParent()) ||
1317 UserI->getParent() == NewLatch ||
1318 llvm::is_contained(InductionPHIs, UserI))
1319 U.set(NewI);
1320 }
1321 // Add operands of moved instruction to the worklist, except if they are
1322 // outside the inner loop or are the induction PHI.
1323 for (Value *Op : WorkList[i]->operands()) {
1324 Instruction *OpI = dyn_cast<Instruction>(Op);
1325 if (!OpI ||
1326 this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
1327 llvm::is_contained(InductionPHIs, OpI))
1328 continue;
1329 WorkList.insert(OpI);
1330 }
1331 }
1332 };
1333
1334 // FIXME: Should we interchange when we have a constant condition?
1335 Instruction *CondI = dyn_cast<Instruction>(
1336 cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
1337 ->getCondition());
1338 if (CondI)
1339 WorkList.insert(CondI);
1340 MoveInstructions();
1341 for (Instruction *InnerIndexVar : InnerIndexVarList)
1342 WorkList.insert(cast<Instruction>(InnerIndexVar));
1343 MoveInstructions();
1344 }
1345
1346 // Ensure the inner loop phi nodes have a separate basic block.
1347 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1348 if (InnerLoopHeader->getFirstNonPHI() != InnerLoopHeader->getTerminator()) {
1349 SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1350 LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "splitting InnerLoopHeader done\n"
; } } while (false)
;
1351 }
1352
1353 // Instructions in the original inner loop preheader may depend on values
1354 // defined in the outer loop header. Move them there, because the original
1355 // inner loop preheader will become the entry into the interchanged loop nest.
1356 // Currently we move all instructions and rely on LICM to move invariant
1357 // instructions outside the loop nest.
1358 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1359 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1360 if (InnerLoopPreHeader != OuterLoopHeader) {
1361 SmallPtrSet<Instruction *, 4> NeedsMoving;
1362 for (Instruction &I :
1363 make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
1364 std::prev(InnerLoopPreHeader->end()))))
1365 I.moveBefore(OuterLoopHeader->getTerminator());
1366 }
1367
1368 Transformed |= adjustLoopLinks();
1369 if (!Transformed) {
1370 LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "adjustLoopLinks failed\n"
; } } while (false)
;
1371 return false;
1372 }
1373
1374 return true;
1375}
1376
1377/// \brief Move all instructions except the terminator from FromBB right before
1378/// InsertBefore
1379static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1380 auto &ToList = InsertBefore->getParent()->getInstList();
1381 auto &FromList = FromBB->getInstList();
1382
1383 ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
1384 FromBB->getTerminator()->getIterator());
1385}
1386
1387/// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
1388static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
1389 // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
1390 // from BB1 afterwards.
1391 auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
1392 SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
1393 for (Instruction *I : TempInstrs)
1394 I->removeFromParent();
1395
1396 // Move instructions from BB2 to BB1.
1397 moveBBContents(BB2, BB1->getTerminator());
1398
1399 // Move instructions from TempInstrs to BB2.
1400 for (Instruction *I : TempInstrs)
1401 I->insertBefore(BB2->getTerminator());
1402}
1403
1404// Update BI to jump to NewBB instead of OldBB. Records updates to the
1405// dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1406// \p OldBB is exactly once in BI's successor list.
1407static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1408 BasicBlock *NewBB,
1409 std::vector<DominatorTree::UpdateType> &DTUpdates,
1410 bool MustUpdateOnce = true) {
1411 assert((!MustUpdateOnce ||(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1415, __extension__
__PRETTY_FUNCTION__))
1412 llvm::count_if(successors(BI),(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1415, __extension__
__PRETTY_FUNCTION__))
1413 [OldBB](BasicBlock *BB) {(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1415, __extension__
__PRETTY_FUNCTION__))
1414 return BB == OldBB;(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1415, __extension__
__PRETTY_FUNCTION__))
1415 }) == 1) && "BI must jump to OldBB exactly once.")(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1415, __extension__
__PRETTY_FUNCTION__))
;
1416 bool Changed = false;
1417 for (Use &Op : BI->operands())
1418 if (Op == OldBB) {
1419 Op.set(NewBB);
1420 Changed = true;
1421 }
1422
1423 if (Changed) {
1424 DTUpdates.push_back(
1425 {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
1426 DTUpdates.push_back(
1427 {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
1428 }
1429 assert(Changed && "Expected a successor to be updated")(static_cast <bool> (Changed && "Expected a successor to be updated"
) ? void (0) : __assert_fail ("Changed && \"Expected a successor to be updated\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1429, __extension__
__PRETTY_FUNCTION__))
;
1430}
1431
1432// Move Lcssa PHIs to the right place.
1433static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
1434 BasicBlock *InnerLatch, BasicBlock *OuterHeader,
1435 BasicBlock *OuterLatch, BasicBlock *OuterExit,
1436 Loop *InnerLoop, LoopInfo *LI) {
1437
1438 // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
1439 // defined either in the header or latch. Those blocks will become header and
1440 // latch of the new outer loop, and the only possible users can PHI nodes
1441 // in the exit block of the loop nest or the outer loop header (reduction
1442 // PHIs, in that case, the incoming value must be defined in the inner loop
1443 // header). We can just substitute the user with the incoming value and remove
1444 // the PHI.
1445 for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
1446 assert(P.getNumIncomingValues() == 1 &&(static_cast <bool> (P.getNumIncomingValues() == 1 &&
"Only loops with a single exit are supported!") ? void (0) :
__assert_fail ("P.getNumIncomingValues() == 1 && \"Only loops with a single exit are supported!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1447, __extension__
__PRETTY_FUNCTION__))
1447 "Only loops with a single exit are supported!")(static_cast <bool> (P.getNumIncomingValues() == 1 &&
"Only loops with a single exit are supported!") ? void (0) :
__assert_fail ("P.getNumIncomingValues() == 1 && \"Only loops with a single exit are supported!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1447, __extension__
__PRETTY_FUNCTION__))
;
1448
1449 // Incoming values are guaranteed be instructions currently.
1450 auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
1451 // In case of multi-level nested loops, follow LCSSA to find the incoming
1452 // value defined from the innermost loop.
1453 auto IncIInnerMost = cast<Instruction>(followLCSSA(IncI));
1454 // Skip phis with incoming values from the inner loop body, excluding the
1455 // header and latch.
1456 if (IncIInnerMost->getParent() != InnerLatch &&
1457 IncIInnerMost->getParent() != InnerHeader)
1458 continue;
1459
1460 assert(all_of(P.users(),(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1468, __extension__
__PRETTY_FUNCTION__))
1461 [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1468, __extension__
__PRETTY_FUNCTION__))
1462 return (cast<PHINode>(U)->getParent() == OuterHeader &&(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1468, __extension__
__PRETTY_FUNCTION__))
1463 IncI->getParent() == InnerHeader) ||(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1468, __extension__
__PRETTY_FUNCTION__))
1464 cast<PHINode>(U)->getParent() == OuterExit;(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1468, __extension__
__PRETTY_FUNCTION__))
1465 }) &&(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1468, __extension__
__PRETTY_FUNCTION__))
1466 "Can only replace phis iff the uses are in the loop nest exit or "(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1468, __extension__
__PRETTY_FUNCTION__))
1467 "the incoming value is defined in the inner header (it will "(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1468, __extension__
__PRETTY_FUNCTION__))
1468 "dominate all loop blocks after interchanging)")(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1468, __extension__
__PRETTY_FUNCTION__))
;
1469 P.replaceAllUsesWith(IncI);
1470 P.eraseFromParent();
1471 }
1472
1473 SmallVector<PHINode *, 8> LcssaInnerExit;
1474 for (PHINode &P : InnerExit->phis())
1475 LcssaInnerExit.push_back(&P);
1476
1477 SmallVector<PHINode *, 8> LcssaInnerLatch;
1478 for (PHINode &P : InnerLatch->phis())
1479 LcssaInnerLatch.push_back(&P);
1480
1481 // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1482 // If a PHI node has users outside of InnerExit, it has a use outside the
1483 // interchanged loop and we have to preserve it. We move these to
1484 // InnerLatch, which will become the new exit block for the innermost
1485 // loop after interchanging.
1486 for (PHINode *P : LcssaInnerExit)
1487 P->moveBefore(InnerLatch->getFirstNonPHI());
1488
1489 // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1490 // and we have to move them to the new inner latch.
1491 for (PHINode *P : LcssaInnerLatch)
1492 P->moveBefore(InnerExit->getFirstNonPHI());
1493
1494 // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1495 // incoming values defined in the outer loop, we have to add a new PHI
1496 // in the inner loop latch, which became the exit block of the outer loop,
1497 // after interchanging.
1498 if (OuterExit) {
1499 for (PHINode &P : OuterExit->phis()) {
1500 if (P.getNumIncomingValues() != 1)
1501 continue;
1502 // Skip Phis with incoming values defined in the inner loop. Those should
1503 // already have been updated.
1504 auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
1505 if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
1506 continue;
1507
1508 PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
1509 NewPhi->setIncomingValue(0, P.getIncomingValue(0));
1510 NewPhi->setIncomingBlock(0, OuterLatch);
1511 // We might have incoming edges from other BBs, i.e., the original outer
1512 // header.
1513 for (auto *Pred : predecessors(InnerLatch)) {
1514 if (Pred == OuterLatch)
1515 continue;
1516 NewPhi->addIncoming(P.getIncomingValue(0), Pred);
1517 }
1518 NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
1519 P.setIncomingValue(0, NewPhi);
1520 }
1521 }
1522
1523 // Now adjust the incoming blocks for the LCSSA PHIs.
1524 // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1525 // with the new latch.
1526 InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
1527}
1528
1529bool LoopInterchangeTransform::adjustLoopBranches() {
1530 LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "adjustLoopBranches called\n"
; } } while (false)
;
1531 std::vector<DominatorTree::UpdateType> DTUpdates;
1532
1533 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1534 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1535
1536 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1538, __extension__
__PRETTY_FUNCTION__))
1537 InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1538, __extension__
__PRETTY_FUNCTION__))
1538 InnerLoopPreHeader && "Guaranteed by loop-simplify form")(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1538, __extension__
__PRETTY_FUNCTION__))
;
1539 // Ensure that both preheaders do not contain PHI nodes and have single
1540 // predecessors. This allows us to move them easily. We use
1541 // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1542 // preheaders do not satisfy those conditions.
1543 if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1544 !OuterLoopPreHeader->getUniquePredecessor())
1545 OuterLoopPreHeader =
1546 InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
1547 if (InnerLoopPreHeader == OuterLoop->getHeader())
1548 InnerLoopPreHeader =
1549 InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
1550
1551 // Adjust the loop preheader
1552 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1553 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1554 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1555 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1556 BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1557 BasicBlock *InnerLoopLatchPredecessor =
1558 InnerLoopLatch->getUniquePredecessor();
1559 BasicBlock *InnerLoopLatchSuccessor;
1560 BasicBlock *OuterLoopLatchSuccessor;
1561
1562 BranchInst *OuterLoopLatchBI =
1563 dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1564 BranchInst *InnerLoopLatchBI =
1565 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1566 BranchInst *OuterLoopHeaderBI =
1567 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1568 BranchInst *InnerLoopHeaderBI =
1569 dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1570
1571 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1572 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1573 !InnerLoopHeaderBI)
1574 return false;
1575
1576 BranchInst *InnerLoopLatchPredecessorBI =
1577 dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1578 BranchInst *OuterLoopPredecessorBI =
1579 dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1580
1581 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1582 return false;
1583 BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1584 if (!InnerLoopHeaderSuccessor)
1585 return false;
1586
1587 // Adjust Loop Preheader and headers.
1588 // The branches in the outer loop predecessor and the outer loop header can
1589 // be unconditional branches or conditional branches with duplicates. Consider
1590 // this when updating the successors.
1591 updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1592 InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
1593 // The outer loop header might or might not branch to the outer latch.
1594 // We are guaranteed to branch to the inner loop preheader.
1595 if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
1596 // In this case the outerLoopHeader should branch to the InnerLoopLatch.
1597 updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
1598 DTUpdates,
1599 /*MustUpdateOnce=*/false);
1600 }
1601 updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1602 InnerLoopHeaderSuccessor, DTUpdates,
1603 /*MustUpdateOnce=*/false);
1604
1605 // Adjust reduction PHI's now that the incoming block has changed.
1606 InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
1607 OuterLoopHeader);
1608
1609 updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1610 OuterLoopPreHeader, DTUpdates);
1611
1612 // -------------Adjust loop latches-----------
1613 if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1614 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1615 else
1616 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1617
1618 updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1619 InnerLoopLatchSuccessor, DTUpdates);
1620
1621 if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1622 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1623 else
1624 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1625
1626 updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1627 OuterLoopLatchSuccessor, DTUpdates);
1628 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1629 DTUpdates);
1630
1631 DT->applyUpdates(DTUpdates);
1632 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1633 OuterLoopPreHeader);
1634
1635 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1636 OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
1637 InnerLoop, LI);
1638 // For PHIs in the exit block of the outer loop, outer's latch has been
1639 // replaced by Inners'.
1640 OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1641
1642 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1643 // Now update the reduction PHIs in the inner and outer loop headers.
1644 SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1645 for (PHINode &PHI : InnerLoopHeader->phis())
1646 if (OuterInnerReductions.contains(&PHI))
1647 InnerLoopPHIs.push_back(&PHI);
1648
1649 for (PHINode &PHI : OuterLoopHeader->phis())
1650 if (OuterInnerReductions.contains(&PHI))
1651 OuterLoopPHIs.push_back(&PHI);
1652
1653 // Now move the remaining reduction PHIs from outer to inner loop header and
1654 // vice versa. The PHI nodes must be part of a reduction across the inner and
1655 // outer loop and all the remains to do is and updating the incoming blocks.
1656 for (PHINode *PHI : OuterLoopPHIs) {
1657 LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Outer loop reduction PHIs:\n"
; PHI->dump();; } } while (false)
;
1658 PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
1659 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node")(static_cast <bool> (OuterInnerReductions.count(PHI) &&
"Expected a reduction PHI node") ? void (0) : __assert_fail (
"OuterInnerReductions.count(PHI) && \"Expected a reduction PHI node\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1659, __extension__
__PRETTY_FUNCTION__))
;
1660 }
1661 for (PHINode *PHI : InnerLoopPHIs) {
1662 LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop reduction PHIs:\n"
; PHI->dump();; } } while (false)
;
1663 PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
1664 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node")(static_cast <bool> (OuterInnerReductions.count(PHI) &&
"Expected a reduction PHI node") ? void (0) : __assert_fail (
"OuterInnerReductions.count(PHI) && \"Expected a reduction PHI node\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1664, __extension__
__PRETTY_FUNCTION__))
;
1665 }
1666
1667 // Update the incoming blocks for moved PHI nodes.
1668 OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
1669 OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
1670 InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
1671 InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1672
1673 // Values defined in the outer loop header could be used in the inner loop
1674 // latch. In that case, we need to create LCSSA phis for them, because after
1675 // interchanging they will be defined in the new inner loop and used in the
1676 // new outer loop.
1677 IRBuilder<> Builder(OuterLoopHeader->getContext());
1678 SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
1679 for (Instruction &I :
1680 make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
1681 MayNeedLCSSAPhis.push_back(&I);
1682 formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE, Builder);
1683
1684 return true;
1685}
1686
1687bool LoopInterchangeTransform::adjustLoopLinks() {
1688 // Adjust all branches in the inner and outer loop.
1689 bool Changed = adjustLoopBranches();
1690 if (Changed) {
1691 // We have interchanged the preheaders so we need to interchange the data in
1692 // the preheaders as well. This is because the content of the inner
1693 // preheader was previously executed inside the outer loop.
1694 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1695 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1696 swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
1697 }
1698 return Changed;
1699}
1700
1701namespace {
1702/// Main LoopInterchange Pass.
1703struct LoopInterchangeLegacyPass : public LoopPass {
1704 static char ID;
1705
1706 LoopInterchangeLegacyPass() : LoopPass(ID) {
1707 initializeLoopInterchangeLegacyPassPass(*PassRegistry::getPassRegistry());
1708 }
1709
1710 void getAnalysisUsage(AnalysisUsage &AU) const override {
1711 AU.addRequired<DependenceAnalysisWrapperPass>();
1712 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1713
1714 getLoopAnalysisUsage(AU);
1715 }
1716
1717 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1718 if (skipLoop(L))
1719 return false;
1720
1721 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1722 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1723 auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
1724 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1725 auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1726 std::unique_ptr<CacheCost> CC = nullptr;
1727 return LoopInterchange(SE, LI, DI, DT, CC, ORE).run(L);
1728 }
1729};
1730} // namespace
1731
1732char LoopInterchangeLegacyPass::ID = 0;
1733
1734INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange",static void *initializeLoopInterchangeLegacyPassPassOnce(PassRegistry
&Registry) {
1735 "Interchanges loops for cache reuse", false, false)static void *initializeLoopInterchangeLegacyPassPassOnce(PassRegistry
&Registry) {
1736INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry);
1737INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)initializeDependenceAnalysisWrapperPassPass(Registry);
1738INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)initializeOptimizationRemarkEmitterWrapperPassPass(Registry);
1739
1740INITIALIZE_PASS_END(LoopInterchangeLegacyPass, "loop-interchange",PassInfo *PI = new PassInfo( "Interchanges loops for cache reuse"
, "loop-interchange", &LoopInterchangeLegacyPass::ID, PassInfo
::NormalCtor_t(callDefaultCtor<LoopInterchangeLegacyPass>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeLoopInterchangeLegacyPassPassFlag
; void llvm::initializeLoopInterchangeLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeLoopInterchangeLegacyPassPassFlag
, initializeLoopInterchangeLegacyPassPassOnce, std::ref(Registry
)); }
1741 "Interchanges loops for cache reuse", false, false)PassInfo *PI = new PassInfo( "Interchanges loops for cache reuse"
, "loop-interchange", &LoopInterchangeLegacyPass::ID, PassInfo
::NormalCtor_t(callDefaultCtor<LoopInterchangeLegacyPass>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeLoopInterchangeLegacyPassPassFlag
; void llvm::initializeLoopInterchangeLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeLoopInterchangeLegacyPassPassFlag
, initializeLoopInterchangeLegacyPassPassOnce, std::ref(Registry
)); }
1742
1743Pass *llvm::createLoopInterchangePass() {
1744 return new LoopInterchangeLegacyPass();
1745}
1746
1747PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
1748 LoopAnalysisManager &AM,
1749 LoopStandardAnalysisResults &AR,
1750 LPMUpdater &U) {
1751 Function &F = *LN.getParent();
1752
1753 DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
1754 std::unique_ptr<CacheCost> CC =
1755 CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
1756 OptimizationRemarkEmitter ORE(&F);
1757 if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
1758 return PreservedAnalyses::all();
1759 U.markLoopNestChanged(true);
1760 return getLoopPassPreservedAnalyses();
1761}