Bug Summary

File:llvm/lib/Transforms/Scalar/LoopInterchange.cpp
Warning:line 1481, column 7
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name LoopInterchange.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/build-llvm -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/llvm/lib/Transforms/Scalar -I include -I /build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/build-llvm=build-llvm -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/build-llvm=build-llvm -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/build-llvm=build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-01-26-233846-219801-1 -x c++ /build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
1//===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This Pass handles loop interchange transform.
10// This pass interchanges loops to provide a more cache-friendly memory access
11// patterns.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/Scalar/LoopInterchange.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/Analysis/DependenceAnalysis.h"
21#include "llvm/Analysis/LoopInfo.h"
22#include "llvm/Analysis/LoopNestAnalysis.h"
23#include "llvm/Analysis/LoopPass.h"
24#include "llvm/Analysis/OptimizationRemarkEmitter.h"
25#include "llvm/Analysis/ScalarEvolution.h"
26#include "llvm/Analysis/ScalarEvolutionExpressions.h"
27#include "llvm/IR/BasicBlock.h"
28#include "llvm/IR/Constants.h"
29#include "llvm/IR/DiagnosticInfo.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/IRBuilder.h"
33#include "llvm/IR/InstrTypes.h"
34#include "llvm/IR/Instruction.h"
35#include "llvm/IR/Instructions.h"
36#include "llvm/IR/Type.h"
37#include "llvm/IR/User.h"
38#include "llvm/IR/Value.h"
39#include "llvm/InitializePasses.h"
40#include "llvm/Pass.h"
41#include "llvm/Support/Casting.h"
42#include "llvm/Support/CommandLine.h"
43#include "llvm/Support/Debug.h"
44#include "llvm/Support/ErrorHandling.h"
45#include "llvm/Support/raw_ostream.h"
46#include "llvm/Transforms/Scalar.h"
47#include "llvm/Transforms/Utils.h"
48#include "llvm/Transforms/Utils/BasicBlockUtils.h"
49#include "llvm/Transforms/Utils/LoopUtils.h"
50#include <cassert>
51#include <utility>
52#include <vector>
53
54using namespace llvm;
55
56#define DEBUG_TYPE"loop-interchange" "loop-interchange"
57
58STATISTIC(LoopsInterchanged, "Number of loops interchanged")static llvm::Statistic LoopsInterchanged = {"loop-interchange"
, "LoopsInterchanged", "Number of loops interchanged"}
;
59
60static cl::opt<int> LoopInterchangeCostThreshold(
61 "loop-interchange-threshold", cl::init(0), cl::Hidden,
62 cl::desc("Interchange if you gain more than this number"));
63
64namespace {
65
66using LoopVector = SmallVector<Loop *, 8>;
67
68// TODO: Check if we can use a sparse matrix here.
69using CharMatrix = std::vector<std::vector<char>>;
70
71} // end anonymous namespace
72
73// Maximum number of dependencies that can be handled in the dependency matrix.
74static const unsigned MaxMemInstrCount = 100;
75
76// Maximum loop depth supported.
77static const unsigned MaxLoopNestDepth = 10;
78
79#ifdef DUMP_DEP_MATRICIES
80static void printDepMatrix(CharMatrix &DepMatrix) {
81 for (auto &Row : DepMatrix) {
82 for (auto D : Row)
83 LLVM_DEBUG(dbgs() << D << " ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << D << " "; } } while
(false)
;
84 LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "\n"; } } while (false
)
;
85 }
86}
87#endif
88
89static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
90 Loop *L, DependenceInfo *DI) {
91 using ValueVector = SmallVector<Value *, 16>;
92
93 ValueVector MemInstr;
94
95 // For each block.
96 for (BasicBlock *BB : L->blocks()) {
97 // Scan the BB and collect legal loads and stores.
98 for (Instruction &I : *BB) {
99 if (!isa<Instruction>(I))
100 return false;
101 if (auto *Ld = dyn_cast<LoadInst>(&I)) {
102 if (!Ld->isSimple())
103 return false;
104 MemInstr.push_back(&I);
105 } else if (auto *St = dyn_cast<StoreInst>(&I)) {
106 if (!St->isSimple())
107 return false;
108 MemInstr.push_back(&I);
109 }
110 }
111 }
112
113 LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found " << MemInstr
.size() << " Loads and Stores to analyze\n"; } } while (
false)
114 << " Loads and Stores to analyze\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found " << MemInstr
.size() << " Loads and Stores to analyze\n"; } } while (
false)
;
115
116 ValueVector::iterator I, IE, J, JE;
117
118 for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
119 for (J = I, JE = MemInstr.end(); J != JE; ++J) {
120 std::vector<char> Dep;
121 Instruction *Src = cast<Instruction>(*I);
122 Instruction *Dst = cast<Instruction>(*J);
123 if (Src == Dst)
124 continue;
125 // Ignore Input dependencies.
126 if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
127 continue;
128 // Track Output, Flow, and Anti dependencies.
129 if (auto D = DI->depends(Src, Dst, true)) {
130 assert(D->isOrdered() && "Expected an output, flow or anti dep.")(static_cast <bool> (D->isOrdered() && "Expected an output, flow or anti dep."
) ? void (0) : __assert_fail ("D->isOrdered() && \"Expected an output, flow or anti dep.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 130, __extension__
__PRETTY_FUNCTION__))
;
131 LLVM_DEBUG(StringRef DepType =do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
132 D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
133 dbgs() << "Found " << DepTypedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
134 << " dependency between Src and Dst\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
135 << " Src:" << *Src << "\n Dst:" << *Dst << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
;
136 unsigned Levels = D->getLevels();
137 char Direction;
138 for (unsigned II = 1; II <= Levels; ++II) {
139 const SCEV *Distance = D->getDistance(II);
140 const SCEVConstant *SCEVConst =
141 dyn_cast_or_null<SCEVConstant>(Distance);
142 if (SCEVConst) {
143 const ConstantInt *CI = SCEVConst->getValue();
144 if (CI->isNegative())
145 Direction = '<';
146 else if (CI->isZero())
147 Direction = '=';
148 else
149 Direction = '>';
150 Dep.push_back(Direction);
151 } else if (D->isScalar(II)) {
152 Direction = 'S';
153 Dep.push_back(Direction);
154 } else {
155 unsigned Dir = D->getDirection(II);
156 if (Dir == Dependence::DVEntry::LT ||
157 Dir == Dependence::DVEntry::LE)
158 Direction = '<';
159 else if (Dir == Dependence::DVEntry::GT ||
160 Dir == Dependence::DVEntry::GE)
161 Direction = '>';
162 else if (Dir == Dependence::DVEntry::EQ)
163 Direction = '=';
164 else
165 Direction = '*';
166 Dep.push_back(Direction);
167 }
168 }
169 while (Dep.size() != Level) {
170 Dep.push_back('I');
171 }
172
173 DepMatrix.push_back(Dep);
174 if (DepMatrix.size() > MaxMemInstrCount) {
175 LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle more than "
<< MaxMemInstrCount << " dependencies inside loop\n"
; } } while (false)
176 << " dependencies inside loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle more than "
<< MaxMemInstrCount << " dependencies inside loop\n"
; } } while (false)
;
177 return false;
178 }
179 }
180 }
181 }
182
183 return true;
184}
185
186// A loop is moved from index 'from' to an index 'to'. Update the Dependence
187// matrix by exchanging the two columns.
188static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
189 unsigned ToIndx) {
190 for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I)
191 std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]);
192}
193
194// Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
195// '>'
196static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
197 unsigned Column) {
198 for (unsigned i = 0; i <= Column; ++i) {
199 if (DepMatrix[Row][i] == '<')
200 return false;
201 if (DepMatrix[Row][i] == '>')
202 return true;
203 }
204 // All dependencies were '=','S' or 'I'
205 return false;
206}
207
208// Checks if no dependence exist in the dependency matrix in Row before Column.
209static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
210 unsigned Column) {
211 for (unsigned i = 0; i < Column; ++i) {
212 if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' &&
213 DepMatrix[Row][i] != 'I')
214 return false;
215 }
216 return true;
217}
218
219static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
220 unsigned OuterLoopId, char InnerDep,
221 char OuterDep) {
222 if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
223 return false;
224
225 if (InnerDep == OuterDep)
226 return true;
227
228 // It is legal to interchange if and only if after interchange no row has a
229 // '>' direction as the leftmost non-'='.
230
231 if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
232 return true;
233
234 if (InnerDep == '<')
235 return true;
236
237 if (InnerDep == '>') {
238 // If OuterLoopId represents outermost loop then interchanging will make the
239 // 1st dependency as '>'
240 if (OuterLoopId == 0)
241 return false;
242
243 // If all dependencies before OuterloopId are '=','S'or 'I'. Then
244 // interchanging will result in this row having an outermost non '='
245 // dependency of '>'
246 if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
247 return true;
248 }
249
250 return false;
251}
252
253// Checks if it is legal to interchange 2 loops.
254// [Theorem] A permutation of the loops in a perfect nest is legal if and only
255// if the direction matrix, after the same permutation is applied to its
256// columns, has no ">" direction as the leftmost non-"=" direction in any row.
257static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
258 unsigned InnerLoopId,
259 unsigned OuterLoopId) {
260 unsigned NumRows = DepMatrix.size();
261 // For each row check if it is valid to interchange.
262 for (unsigned Row = 0; Row < NumRows; ++Row) {
263 char InnerDep = DepMatrix[Row][InnerLoopId];
264 char OuterDep = DepMatrix[Row][OuterLoopId];
265 if (InnerDep == '*' || OuterDep == '*')
266 return false;
267 if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))
268 return false;
269 }
270 return true;
271}
272
273static LoopVector populateWorklist(Loop &L) {
274 LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
275 << L.getHeader()->getParent()->getName() << " Loop: %"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
276 << L.getHeader()->getName() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
;
277 LoopVector LoopList;
278 Loop *CurrentLoop = &L;
279 const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
280 while (!Vec->empty()) {
281 // The current loop has multiple subloops in it hence it is not tightly
282 // nested.
283 // Discard all loops above it added into Worklist.
284 if (Vec->size() != 1)
285 return {};
286
287 LoopList.push_back(CurrentLoop);
288 CurrentLoop = Vec->front();
289 Vec = &CurrentLoop->getSubLoops();
290 }
291 LoopList.push_back(CurrentLoop);
292 return LoopList;
293}
294
295namespace {
296
297/// LoopInterchangeLegality checks if it is legal to interchange the loop.
298class LoopInterchangeLegality {
299public:
300 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
301 OptimizationRemarkEmitter *ORE)
302 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
303
304 /// Check if the loops can be interchanged.
305 bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
306 CharMatrix &DepMatrix);
307
308 /// Discover induction PHIs in the header of \p L. Induction
309 /// PHIs are added to \p Inductions.
310 bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
311
312 /// Check if the loop structure is understood. We do not handle triangular
313 /// loops for now.
314 bool isLoopStructureUnderstood();
315
316 bool currentLimitations();
317
318 const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
319 return OuterInnerReductions;
320 }
321
322 const SmallVectorImpl<PHINode *> &getInnerLoopInductions() const {
323 return InnerLoopInductions;
324 }
325
326private:
327 bool tightlyNested(Loop *Outer, Loop *Inner);
328 bool containsUnsafeInstructions(BasicBlock *BB);
329
330 /// Discover induction and reduction PHIs in the header of \p L. Induction
331 /// PHIs are added to \p Inductions, reductions are added to
332 /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
333 /// to be passed as \p InnerLoop.
334 bool findInductionAndReductions(Loop *L,
335 SmallVector<PHINode *, 8> &Inductions,
336 Loop *InnerLoop);
337
338 Loop *OuterLoop;
339 Loop *InnerLoop;
340
341 ScalarEvolution *SE;
342
343 /// Interface to emit optimization remarks.
344 OptimizationRemarkEmitter *ORE;
345
346 /// Set of reduction PHIs taking part of a reduction across the inner and
347 /// outer loop.
348 SmallPtrSet<PHINode *, 4> OuterInnerReductions;
349
350 /// Set of inner loop induction PHIs
351 SmallVector<PHINode *, 8> InnerLoopInductions;
352};
353
354/// LoopInterchangeProfitability checks if it is profitable to interchange the
355/// loop.
356class LoopInterchangeProfitability {
357public:
358 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
359 OptimizationRemarkEmitter *ORE)
360 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
361
362 /// Check if the loop interchange is profitable.
363 bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
364 CharMatrix &DepMatrix);
365
366private:
367 int getInstrOrderCost();
368
369 Loop *OuterLoop;
370 Loop *InnerLoop;
371
372 /// Scev analysis.
373 ScalarEvolution *SE;
374
375 /// Interface to emit optimization remarks.
376 OptimizationRemarkEmitter *ORE;
377};
378
379/// LoopInterchangeTransform interchanges the loop.
380class LoopInterchangeTransform {
381public:
382 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
383 LoopInfo *LI, DominatorTree *DT,
384 const LoopInterchangeLegality &LIL)
385 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
386
387 /// Interchange OuterLoop and InnerLoop.
388 bool transform();
389 void restructureLoops(Loop *NewInner, Loop *NewOuter,
390 BasicBlock *OrigInnerPreHeader,
391 BasicBlock *OrigOuterPreHeader);
392 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
393
394private:
395 bool adjustLoopLinks();
396 bool adjustLoopBranches();
397
398 Loop *OuterLoop;
399 Loop *InnerLoop;
400
401 /// Scev analysis.
402 ScalarEvolution *SE;
403
404 LoopInfo *LI;
405 DominatorTree *DT;
406
407 const LoopInterchangeLegality &LIL;
408};
409
410struct LoopInterchange {
411 ScalarEvolution *SE = nullptr;
412 LoopInfo *LI = nullptr;
413 DependenceInfo *DI = nullptr;
414 DominatorTree *DT = nullptr;
415
416 /// Interface to emit optimization remarks.
417 OptimizationRemarkEmitter *ORE;
418
419 LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
420 DominatorTree *DT, OptimizationRemarkEmitter *ORE)
421 : SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {}
422
423 bool run(Loop *L) {
424 if (L->getParentLoop())
425 return false;
426
427 return processLoopList(populateWorklist(*L));
428 }
429
430 bool run(LoopNest &LN) {
431 const auto &LoopList = LN.getLoops();
432 for (unsigned I = 1; I < LoopList.size(); ++I)
433 if (LoopList[I]->getParentLoop() != LoopList[I - 1])
434 return false;
435 return processLoopList(LoopList);
436 }
437
438 bool isComputableLoopNest(ArrayRef<Loop *> LoopList) {
439 for (Loop *L : LoopList) {
440 const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
441 if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
442 LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Couldn't compute backedge count\n"
; } } while (false)
;
443 return false;
444 }
445 if (L->getNumBackEdges() != 1) {
446 LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "NumBackEdges is not equal to 1\n"
; } } while (false)
;
447 return false;
448 }
449 if (!L->getExitingBlock()) {
450 LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop doesn't have unique exit block\n"
; } } while (false)
;
451 return false;
452 }
453 }
454 return true;
455 }
456
457 unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
458 // TODO: Add a better heuristic to select the loop to be interchanged based
459 // on the dependence matrix. Currently we select the innermost loop.
460 return LoopList.size() - 1;
461 }
462
463 bool processLoopList(ArrayRef<Loop *> LoopList) {
464 bool Changed = false;
465 unsigned LoopNestDepth = LoopList.size();
466 if (LoopNestDepth < 2) {
467 LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop doesn't contain minimum nesting level.\n"
; } } while (false)
;
468 return false;
469 }
470 if (LoopNestDepth > MaxLoopNestDepth) {
471 LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle loops of depth greater than "
<< MaxLoopNestDepth << "\n"; } } while (false)
472 << MaxLoopNestDepth << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle loops of depth greater than "
<< MaxLoopNestDepth << "\n"; } } while (false)
;
473 return false;
474 }
475 if (!isComputableLoopNest(LoopList)) {
476 LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not valid loop candidate for interchange\n"
; } } while (false)
;
477 return false;
478 }
479
480 LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepthdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing LoopList of size = "
<< LoopNestDepth << "\n"; } } while (false)
481 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing LoopList of size = "
<< LoopNestDepth << "\n"; } } while (false)
;
482
483 CharMatrix DependencyMatrix;
484 Loop *OuterMostLoop = *(LoopList.begin());
485 if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
486 OuterMostLoop, DI)) {
487 LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Populating dependency matrix failed\n"
; } } while (false)
;
488 return false;
489 }
490#ifdef DUMP_DEP_MATRICIES
491 LLVM_DEBUG(dbgs() << "Dependence before interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Dependence before interchange\n"
; } } while (false)
;
492 printDepMatrix(DependencyMatrix);
493#endif
494
495 // Get the Outermost loop exit.
496 BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
497 if (!LoopNestExit) {
498 LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "OuterMostLoop needs an unique exit block"
; } } while (false)
;
499 return false;
500 }
501
502 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
503 // Move the selected loop outwards to the best possible position.
504 Loop *LoopToBeInterchanged = LoopList[SelecLoopId];
505 for (unsigned i = SelecLoopId; i > 0; i--) {
506 bool Interchanged = processLoop(LoopToBeInterchanged, LoopList[i - 1], i,
507 i - 1, DependencyMatrix);
508 if (!Interchanged)
509 return Changed;
510 // Update the DependencyMatrix
511 interChangeDependencies(DependencyMatrix, i, i - 1);
512#ifdef DUMP_DEP_MATRICIES
513 LLVM_DEBUG(dbgs() << "Dependence after interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Dependence after interchange\n"
; } } while (false)
;
514 printDepMatrix(DependencyMatrix);
515#endif
516 Changed |= Interchanged;
517 }
518 return Changed;
519 }
520
521 bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
522 unsigned OuterLoopId,
523 std::vector<std::vector<char>> &DependencyMatrix) {
524 LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << "\n"; } } while (false)
525 << " and OuterLoopId = " << OuterLoopId << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << "\n"; } } while (false)
;
526 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
527 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
528 LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not interchanging loops. Cannot prove legality.\n"
; } } while (false)
;
529 return false;
530 }
531 LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops are legal to interchange\n"
; } } while (false)
;
532 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
533 if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
534 LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Interchanging loops not profitable.\n"
; } } while (false)
;
535 return false;
536 }
537
538 ORE->emit([&]() {
539 return OptimizationRemark(DEBUG_TYPE"loop-interchange", "Interchanged",
540 InnerLoop->getStartLoc(),
541 InnerLoop->getHeader())
542 << "Loop interchanged with enclosing loop.";
543 });
544
545 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
546 LIT.transform();
547 LLVM_DEBUG(dbgs() << "Loops interchanged.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops interchanged.\n"
; } } while (false)
;
548 LoopsInterchanged++;
549
550 assert(InnerLoop->isLCSSAForm(*DT) &&(static_cast <bool> (InnerLoop->isLCSSAForm(*DT) &&
"Inner loop not left in LCSSA form after loop interchange!")
? void (0) : __assert_fail ("InnerLoop->isLCSSAForm(*DT) && \"Inner loop not left in LCSSA form after loop interchange!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 551, __extension__
__PRETTY_FUNCTION__))
551 "Inner loop not left in LCSSA form after loop interchange!")(static_cast <bool> (InnerLoop->isLCSSAForm(*DT) &&
"Inner loop not left in LCSSA form after loop interchange!")
? void (0) : __assert_fail ("InnerLoop->isLCSSAForm(*DT) && \"Inner loop not left in LCSSA form after loop interchange!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 551, __extension__
__PRETTY_FUNCTION__))
;
552 assert(OuterLoop->isLCSSAForm(*DT) &&(static_cast <bool> (OuterLoop->isLCSSAForm(*DT) &&
"Outer loop not left in LCSSA form after loop interchange!")
? void (0) : __assert_fail ("OuterLoop->isLCSSAForm(*DT) && \"Outer loop not left in LCSSA form after loop interchange!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 553, __extension__
__PRETTY_FUNCTION__))
553 "Outer loop not left in LCSSA form after loop interchange!")(static_cast <bool> (OuterLoop->isLCSSAForm(*DT) &&
"Outer loop not left in LCSSA form after loop interchange!")
? void (0) : __assert_fail ("OuterLoop->isLCSSAForm(*DT) && \"Outer loop not left in LCSSA form after loop interchange!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 553, __extension__
__PRETTY_FUNCTION__))
;
554
555 return true;
556 }
557};
558
559} // end anonymous namespace
560
561bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
562 return any_of(*BB, [](const Instruction &I) {
563 return I.mayHaveSideEffects() || I.mayReadFromMemory();
564 });
565}
566
567bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
568 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
569 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
570 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
571
572 LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Checking if loops are tightly nested\n"
; } } while (false)
;
573
574 // A perfectly nested loop will not have any branch in between the outer and
575 // inner block i.e. outer header will branch to either inner preheader and
576 // outerloop latch.
577 BranchInst *OuterLoopHeaderBI =
578 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
579 if (!OuterLoopHeaderBI)
580 return false;
581
582 for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
583 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
584 Succ != OuterLoopLatch)
585 return false;
586
587 LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Checking instructions in Loop header and Loop latch\n"
; } } while (false)
;
588 // We do not have any basic block in between now make sure the outer header
589 // and outer loop latch doesn't contain any unsafe instructions.
590 if (containsUnsafeInstructions(OuterLoopHeader) ||
591 containsUnsafeInstructions(OuterLoopLatch))
592 return false;
593
594 // Also make sure the inner loop preheader does not contain any unsafe
595 // instructions. Note that all instructions in the preheader will be moved to
596 // the outer loop header when interchanging.
597 if (InnerLoopPreHeader != OuterLoopHeader &&
598 containsUnsafeInstructions(InnerLoopPreHeader))
599 return false;
600
601 BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
602 // Ensure the inner loop exit block flows to the outer loop latch possibly
603 // through empty blocks.
604 const BasicBlock &SuccInner =
605 LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch);
606 if (&SuccInner != OuterLoopLatch) {
607 LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExitdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop exit block "
<< *InnerLoopExit << " does not lead to the outer loop latch.\n"
;; } } while (false)
608 << " does not lead to the outer loop latch.\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop exit block "
<< *InnerLoopExit << " does not lead to the outer loop latch.\n"
;; } } while (false)
;
609 return false;
610 }
611 // The inner loop exit block does flow to the outer loop latch and not some
612 // other BBs, now make sure it contains safe instructions, since it will be
613 // moved into the (new) inner loop after interchange.
614 if (containsUnsafeInstructions(InnerLoopExit))
615 return false;
616
617 LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops are perfectly nested\n"
; } } while (false)
;
618 // We have a perfect loop nest.
619 return true;
620}
621
622bool LoopInterchangeLegality::isLoopStructureUnderstood() {
623 BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
624 for (PHINode *InnerInduction : InnerLoopInductions) {
625 unsigned Num = InnerInduction->getNumOperands();
626 for (unsigned i = 0; i < Num; ++i) {
627 Value *Val = InnerInduction->getOperand(i);
628 if (isa<Constant>(Val))
629 continue;
630 Instruction *I = dyn_cast<Instruction>(Val);
631 if (!I)
632 return false;
633 // TODO: Handle triangular loops.
634 // e.g. for(int i=0;i<N;i++)
635 // for(int j=i;j<N;j++)
636 unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
637 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
638 InnerLoopPreheader &&
639 !OuterLoop->isLoopInvariant(I)) {
640 return false;
641 }
642 }
643 }
644
645 // TODO: Handle triangular loops of another form.
646 // e.g. for(int i=0;i<N;i++)
647 // for(int j=0;j<i;j++)
648 // or,
649 // for(int i=0;i<N;i++)
650 // for(int j=0;j*i<N;j++)
651 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
652 BranchInst *InnerLoopLatchBI =
653 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
654 if (!InnerLoopLatchBI->isConditional())
655 return false;
656 if (CmpInst *InnerLoopCmp =
657 dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) {
658 Value *Op0 = InnerLoopCmp->getOperand(0);
659 Value *Op1 = InnerLoopCmp->getOperand(1);
660
661 // LHS and RHS of the inner loop exit condition, e.g.,
662 // in "for(int j=0;j<i;j++)", LHS is j and RHS is i.
663 Value *Left = nullptr;
664 Value *Right = nullptr;
665
666 // Check if V only involves inner loop induction variable.
667 // Return true if V is InnerInduction, or a cast from
668 // InnerInduction, or a binary operator that involves
669 // InnerInduction and a constant.
670 std::function<bool(Value *)> IsPathToInnerIndVar;
671 IsPathToInnerIndVar = [this, &IsPathToInnerIndVar](const Value *V) -> bool {
672 if (llvm::is_contained(InnerLoopInductions, V))
673 return true;
674 if (isa<Constant>(V))
675 return true;
676 const Instruction *I = dyn_cast<Instruction>(V);
677 if (!I)
678 return false;
679 if (isa<CastInst>(I))
680 return IsPathToInnerIndVar(I->getOperand(0));
681 if (isa<BinaryOperator>(I))
682 return IsPathToInnerIndVar(I->getOperand(0)) &&
683 IsPathToInnerIndVar(I->getOperand(1));
684 return false;
685 };
686
687 // In case of multiple inner loop indvars, it is okay if LHS and RHS
688 // are both inner indvar related variables.
689 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
690 return true;
691
692 // Otherwise we check if the cmp instruction compares an inner indvar
693 // related variable (Left) with a outer loop invariant (Right).
694 if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
695 Left = Op0;
696 Right = Op1;
697 } else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
698 Left = Op1;
699 Right = Op0;
700 }
701
702 if (Left == nullptr)
703 return false;
704
705 const SCEV *S = SE->getSCEV(Right);
706 if (!SE->isLoopInvariant(S, OuterLoop))
707 return false;
708 }
709
710 return true;
711}
712
713// If SV is a LCSSA PHI node with a single incoming value, return the incoming
714// value.
715static Value *followLCSSA(Value *SV) {
716 PHINode *PHI = dyn_cast<PHINode>(SV);
717 if (!PHI)
718 return SV;
719
720 if (PHI->getNumIncomingValues() != 1)
721 return SV;
722 return followLCSSA(PHI->getIncomingValue(0));
723}
724
725// Check V's users to see if it is involved in a reduction in L.
726static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
727 // Reduction variables cannot be constants.
728 if (isa<Constant>(V))
729 return nullptr;
730
731 for (Value *User : V->users()) {
732 if (PHINode *PHI = dyn_cast<PHINode>(User)) {
733 if (PHI->getNumIncomingValues() == 1)
734 continue;
735 RecurrenceDescriptor RD;
736 if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
737 return PHI;
738 return nullptr;
739 }
740 }
741
742 return nullptr;
743}
744
745bool LoopInterchangeLegality::findInductionAndReductions(
746 Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
747 if (!L->getLoopLatch() || !L->getLoopPredecessor())
748 return false;
749 for (PHINode &PHI : L->getHeader()->phis()) {
750 RecurrenceDescriptor RD;
751 InductionDescriptor ID;
752 if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
753 Inductions.push_back(&PHI);
754 else {
755 // PHIs in inner loops need to be part of a reduction in the outer loop,
756 // discovered when checking the PHIs of the outer loop earlier.
757 if (!InnerLoop) {
758 if (!OuterInnerReductions.count(&PHI)) {
759 LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop PHI is not part of reductions "
"across the outer loop.\n"; } } while (false)
760 "across the outer loop.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop PHI is not part of reductions "
"across the outer loop.\n"; } } while (false)
;
761 return false;
762 }
763 } else {
764 assert(PHI.getNumIncomingValues() == 2 &&(static_cast <bool> (PHI.getNumIncomingValues() == 2 &&
"Phis in loop header should have exactly 2 incoming values")
? void (0) : __assert_fail ("PHI.getNumIncomingValues() == 2 && \"Phis in loop header should have exactly 2 incoming values\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 765, __extension__
__PRETTY_FUNCTION__))
765 "Phis in loop header should have exactly 2 incoming values")(static_cast <bool> (PHI.getNumIncomingValues() == 2 &&
"Phis in loop header should have exactly 2 incoming values")
? void (0) : __assert_fail ("PHI.getNumIncomingValues() == 2 && \"Phis in loop header should have exactly 2 incoming values\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 765, __extension__
__PRETTY_FUNCTION__))
;
766 // Check if we have a PHI node in the outer loop that has a reduction
767 // result from the inner loop as an incoming value.
768 Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
769 PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
770 if (!InnerRedPhi ||
771 !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
772 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
773 dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
774 << "Failed to recognize PHI as an induction or reduction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
;
775 return false;
776 }
777 OuterInnerReductions.insert(&PHI);
778 OuterInnerReductions.insert(InnerRedPhi);
779 }
780 }
781 }
782 return true;
783}
784
785// This function indicates the current limitations in the transform as a result
786// of which we do not proceed.
787bool LoopInterchangeLegality::currentLimitations() {
788 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
789
790 // transform currently expects the loop latches to also be the exiting
791 // blocks.
792 if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
793 OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
794 !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
795 !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
796 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
797 dbgs() << "Loops where the latch is not the exiting block are not"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
798 << " supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
;
799 ORE->emit([&]() {
800 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "ExitingNotLatch",
801 OuterLoop->getStartLoc(),
802 OuterLoop->getHeader())
803 << "Loops where the latch is not the exiting block cannot be"
804 " interchange currently.";
805 });
806 return true;
807 }
808
809 SmallVector<PHINode *, 8> Inductions;
810 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
811 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
812 dbgs() << "Only outer loops with induction or reduction PHI nodes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
813 << "are supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
;
814 ORE->emit([&]() {
815 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIOuter",
816 OuterLoop->getStartLoc(),
817 OuterLoop->getHeader())
818 << "Only outer loops with induction or reduction PHI nodes can be"
819 " interchanged currently.";
820 });
821 return true;
822 }
823
824 Inductions.clear();
825 if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) {
826 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
827 dbgs() << "Only inner loops with induction or reduction PHI nodes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
828 << "are supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
;
829 ORE->emit([&]() {
830 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIInner",
831 InnerLoop->getStartLoc(),
832 InnerLoop->getHeader())
833 << "Only inner loops with induction or reduction PHI nodes can be"
834 " interchange currently.";
835 });
836 return true;
837 }
838
839 // TODO: Triangular loops are not handled for now.
840 if (!isLoopStructureUnderstood()) {
841 LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop structure not understood by pass\n"
; } } while (false)
;
842 ORE->emit([&]() {
843 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedStructureInner",
844 InnerLoop->getStartLoc(),
845 InnerLoop->getHeader())
846 << "Inner loop structure not understood currently.";
847 });
848 return true;
849 }
850
851 return false;
852}
853
854bool LoopInterchangeLegality::findInductions(
855 Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
856 for (PHINode &PHI : L->getHeader()->phis()) {
857 InductionDescriptor ID;
858 if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
859 Inductions.push_back(&PHI);
860 }
861 return !Inductions.empty();
862}
863
864// We currently only support LCSSA PHI nodes in the inner loop exit, if their
865// users are either reduction PHIs or PHIs outside the outer loop (which means
866// the we are only interested in the final value after the loop).
867static bool
868areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL,
869 SmallPtrSetImpl<PHINode *> &Reductions) {
870 BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
871 for (PHINode &PHI : InnerExit->phis()) {
872 // Reduction lcssa phi will have only 1 incoming block that from loop latch.
873 if (PHI.getNumIncomingValues() > 1)
874 return false;
875 if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
876 PHINode *PN = dyn_cast<PHINode>(U);
877 return !PN ||
878 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
879 })) {
880 return false;
881 }
882 }
883 return true;
884}
885
886// We currently support LCSSA PHI nodes in the outer loop exit, if their
887// incoming values do not come from the outer loop latch or if the
888// outer loop latch has a single predecessor. In that case, the value will
889// be available if both the inner and outer loop conditions are true, which
890// will still be true after interchanging. If we have multiple predecessor,
891// that may not be the case, e.g. because the outer loop latch may be executed
892// if the inner loop is not executed.
893static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
894 BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
895 for (PHINode &PHI : LoopNestExit->phis()) {
896 // FIXME: We currently are not able to detect floating point reductions
897 // and have to use floating point PHIs as a proxy to prevent
898 // interchanging in the presence of floating point reductions.
899 if (PHI.getType()->isFloatingPointTy())
900 return false;
901 for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
902 Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
903 if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
904 continue;
905
906 // The incoming value is defined in the outer loop latch. Currently we
907 // only support that in case the outer loop latch has a single predecessor.
908 // This guarantees that the outer loop latch is executed if and only if
909 // the inner loop is executed (because tightlyNested() guarantees that the
910 // outer loop header only branches to the inner loop or the outer loop
911 // latch).
912 // FIXME: We could weaken this logic and allow multiple predecessors,
913 // if the values are produced outside the loop latch. We would need
914 // additional logic to update the PHI nodes in the exit block as
915 // well.
916 if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
917 return false;
918 }
919 }
920 return true;
921}
922
923// In case of multi-level nested loops, it may occur that lcssa phis exist in
924// the latch of InnerLoop, i.e., when defs of the incoming values are further
925// inside the loopnest. Sometimes those incoming values are not available
926// after interchange, since the original inner latch will become the new outer
927// latch which may have predecessor paths that do not include those incoming
928// values.
929// TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of
930// multi-level loop nests.
931static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
932 if (InnerLoop->getSubLoops().empty())
933 return true;
934 // If the original outer latch has only one predecessor, then values defined
935 // further inside the looploop, e.g., in the innermost loop, will be available
936 // at the new outer latch after interchange.
937 if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr)
938 return true;
939
940 // The outer latch has more than one predecessors, i.e., the inner
941 // exit and the inner header.
942 // PHI nodes in the inner latch are lcssa phis where the incoming values
943 // are defined further inside the loopnest. Check if those phis are used
944 // in the original inner latch. If that is the case then bail out since
945 // those incoming values may not be available at the new outer latch.
946 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
947 for (PHINode &PHI : InnerLoopLatch->phis()) {
948 for (auto *U : PHI.users()) {
949 Instruction *UI = cast<Instruction>(U);
950 if (InnerLoopLatch == UI->getParent())
951 return false;
952 }
953 }
954 return true;
955}
956
957bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
958 unsigned OuterLoopId,
959 CharMatrix &DepMatrix) {
960 if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
961 LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
962 << " and OuterLoopId = " << OuterLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
963 << " due to dependence\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
;
964 ORE->emit([&]() {
965 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "Dependence",
966 InnerLoop->getStartLoc(),
967 InnerLoop->getHeader())
968 << "Cannot interchange loops due to dependences.";
969 });
970 return false;
971 }
972 // Check if outer and inner loop contain legal instructions only.
973 for (auto *BB : OuterLoop->blocks())
974 for (Instruction &I : BB->instructionsWithoutDebug())
975 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
976 // readnone functions do not prevent interchanging.
977 if (CI->onlyWritesMemory())
978 continue;
979 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
980 dbgs() << "Loops with call instructions cannot be interchanged "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
981 << "safely.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
;
982 ORE->emit([&]() {
983 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "CallInst",
984 CI->getDebugLoc(),
985 CI->getParent())
986 << "Cannot interchange loops due to call instruction.";
987 });
988
989 return false;
990 }
991
992 if (!findInductions(InnerLoop, InnerLoopInductions)) {
993 LLVM_DEBUG(dbgs() << "Cound not find inner loop induction variables.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cound not find inner loop induction variables.\n"
; } } while (false)
;
994 return false;
995 }
996
997 if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) {
998 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in inner loop latch.\n"
; } } while (false)
;
999 ORE->emit([&]() {
1000 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedInnerLatchPHI",
1001 InnerLoop->getStartLoc(),
1002 InnerLoop->getHeader())
1003 << "Cannot interchange loops because unsupported PHI nodes found "
1004 "in inner loop latch.";
1005 });
1006 return false;
1007 }
1008
1009 // TODO: The loops could not be interchanged due to current limitations in the
1010 // transform module.
1011 if (currentLimitations()) {
1012 LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not legal because of current transform limitation\n"
; } } while (false)
;
1013 return false;
1014 }
1015
1016 // Check if the loops are tightly nested.
1017 if (!tightlyNested(OuterLoop, InnerLoop)) {
1018 LLVM_DEBUG(dbgs() << "Loops not tightly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops not tightly nested\n"
; } } while (false)
;
1019 ORE->emit([&]() {
1020 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NotTightlyNested",
1021 InnerLoop->getStartLoc(),
1022 InnerLoop->getHeader())
1023 << "Cannot interchange loops because they are not tightly "
1024 "nested.";
1025 });
1026 return false;
1027 }
1028
1029 if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1030 OuterInnerReductions)) {
1031 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in inner loop exit.\n"
; } } while (false)
;
1032 ORE->emit([&]() {
1033 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedExitPHI",
1034 InnerLoop->getStartLoc(),
1035 InnerLoop->getHeader())
1036 << "Found unsupported PHI node in loop exit.";
1037 });
1038 return false;
1039 }
1040
1041 if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1042 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in outer loop exit.\n"
; } } while (false)
;
1043 ORE->emit([&]() {
1044 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedExitPHI",
1045 OuterLoop->getStartLoc(),
1046 OuterLoop->getHeader())
1047 << "Found unsupported PHI node in loop exit.";
1048 });
1049 return false;
1050 }
1051
1052 return true;
1053}
1054
1055int LoopInterchangeProfitability::getInstrOrderCost() {
1056 unsigned GoodOrder, BadOrder;
1057 BadOrder = GoodOrder = 0;
1058 for (BasicBlock *BB : InnerLoop->blocks()) {
1059 for (Instruction &Ins : *BB) {
1060 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1061 unsigned NumOp = GEP->getNumOperands();
1062 bool FoundInnerInduction = false;
1063 bool FoundOuterInduction = false;
1064 for (unsigned i = 0; i < NumOp; ++i) {
1065 // Skip operands that are not SCEV-able.
1066 if (!SE->isSCEVable(GEP->getOperand(i)->getType()))
1067 continue;
1068
1069 const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
1070 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1071 if (!AR)
1072 continue;
1073
1074 // If we find the inner induction after an outer induction e.g.
1075 // for(int i=0;i<N;i++)
1076 // for(int j=0;j<N;j++)
1077 // A[i][j] = A[i-1][j-1]+k;
1078 // then it is a good order.
1079 if (AR->getLoop() == InnerLoop) {
1080 // We found an InnerLoop induction after OuterLoop induction. It is
1081 // a good order.
1082 FoundInnerInduction = true;
1083 if (FoundOuterInduction) {
1084 GoodOrder++;
1085 break;
1086 }
1087 }
1088 // If we find the outer induction after an inner induction e.g.
1089 // for(int i=0;i<N;i++)
1090 // for(int j=0;j<N;j++)
1091 // A[j][i] = A[j-1][i-1]+k;
1092 // then it is a bad order.
1093 if (AR->getLoop() == OuterLoop) {
1094 // We found an OuterLoop induction after InnerLoop induction. It is
1095 // a bad order.
1096 FoundOuterInduction = true;
1097 if (FoundInnerInduction) {
1098 BadOrder++;
1099 break;
1100 }
1101 }
1102 }
1103 }
1104 }
1105 }
1106 return GoodOrder - BadOrder;
1107}
1108
1109static bool isProfitableForVectorization(unsigned InnerLoopId,
1110 unsigned OuterLoopId,
1111 CharMatrix &DepMatrix) {
1112 // TODO: Improve this heuristic to catch more cases.
1113 // If the inner loop is loop independent or doesn't carry any dependency it is
1114 // profitable to move this to outer position.
1115 for (auto &Row : DepMatrix) {
1116 if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I')
1117 return false;
1118 // TODO: We need to improve this heuristic.
1119 if (Row[OuterLoopId] != '=')
1120 return false;
1121 }
1122 // If outer loop has dependence and inner loop is loop independent then it is
1123 // profitable to interchange to enable parallelism.
1124 // If there are no dependences, interchanging will not improve anything.
1125 return !DepMatrix.empty();
1126}
1127
1128bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
1129 unsigned OuterLoopId,
1130 CharMatrix &DepMatrix) {
1131 // TODO: Add better profitability checks.
1132 // e.g
1133 // 1) Construct dependency matrix and move the one with no loop carried dep
1134 // inside to enable vectorization.
1135
1136 // This is rough cost estimation algorithm. It counts the good and bad order
1137 // of induction variables in the instruction and allows reordering if number
1138 // of bad orders is more than good.
1139 int Cost = getInstrOrderCost();
1140 LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cost = " << Cost
<< "\n"; } } while (false)
;
1141 if (Cost < -LoopInterchangeCostThreshold)
1142 return true;
1143
1144 // It is not profitable as per current cache profitability model. But check if
1145 // we can move this loop outside to improve parallelism.
1146 if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
1147 return true;
1148
1149 ORE->emit([&]() {
1150 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "InterchangeNotProfitable",
1151 InnerLoop->getStartLoc(),
1152 InnerLoop->getHeader())
1153 << "Interchanging loops is too costly (cost="
1154 << ore::NV("Cost", Cost) << ", threshold="
1155 << ore::NV("Threshold", LoopInterchangeCostThreshold)
1156 << ") and it does not improve parallelism.";
1157 });
1158 return false;
1159}
1160
1161void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1162 Loop *InnerLoop) {
1163 for (Loop *L : *OuterLoop)
1164 if (L == InnerLoop) {
1165 OuterLoop->removeChildLoop(L);
1166 return;
1167 }
1168 llvm_unreachable("Couldn't find loop")::llvm::llvm_unreachable_internal("Couldn't find loop", "llvm/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1168)
;
1169}
1170
1171/// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1172/// new inner and outer loop after interchanging: NewInner is the original
1173/// outer loop and NewOuter is the original inner loop.
1174///
1175/// Before interchanging, we have the following structure
1176/// Outer preheader
1177// Outer header
1178// Inner preheader
1179// Inner header
1180// Inner body
1181// Inner latch
1182// outer bbs
1183// Outer latch
1184//
1185// After interchanging:
1186// Inner preheader
1187// Inner header
1188// Outer preheader
1189// Outer header
1190// Inner body
1191// outer bbs
1192// Outer latch
1193// Inner latch
1194void LoopInterchangeTransform::restructureLoops(
1195 Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1196 BasicBlock *OrigOuterPreHeader) {
1197 Loop *OuterLoopParent = OuterLoop->getParentLoop();
1198 // The original inner loop preheader moves from the new inner loop to
1199 // the parent loop, if there is one.
1200 NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1201 LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1202
1203 // Switch the loop levels.
1204 if (OuterLoopParent) {
1205 // Remove the loop from its parent loop.
1206 removeChildLoop(OuterLoopParent, NewInner);
1207 removeChildLoop(NewInner, NewOuter);
1208 OuterLoopParent->addChildLoop(NewOuter);
1209 } else {
1210 removeChildLoop(NewInner, NewOuter);
1211 LI->changeTopLevelLoop(NewInner, NewOuter);
1212 }
1213 while (!NewOuter->isInnermost())
1214 NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1215 NewOuter->addChildLoop(NewInner);
1216
1217 // BBs from the original inner loop.
1218 SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1219
1220 // Add BBs from the original outer loop to the original inner loop (excluding
1221 // BBs already in inner loop)
1222 for (BasicBlock *BB : NewInner->blocks())
1223 if (LI->getLoopFor(BB) == NewInner)
1224 NewOuter->addBlockEntry(BB);
1225
1226 // Now remove inner loop header and latch from the new inner loop and move
1227 // other BBs (the loop body) to the new inner loop.
1228 BasicBlock *OuterHeader = NewOuter->getHeader();
1229 BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1230 for (BasicBlock *BB : OrigInnerBBs) {
1231 // Nothing will change for BBs in child loops.
1232 if (LI->getLoopFor(BB) != NewOuter)
1233 continue;
1234 // Remove the new outer loop header and latch from the new inner loop.
1235 if (BB == OuterHeader || BB == OuterLatch)
1236 NewInner->removeBlockFromLoop(BB);
1237 else
1238 LI->changeLoopFor(BB, NewInner);
1239 }
1240
1241 // The preheader of the original outer loop becomes part of the new
1242 // outer loop.
1243 NewOuter->addBlockEntry(OrigOuterPreHeader);
1244 LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1245
1246 // Tell SE that we move the loops around.
1247 SE->forgetLoop(NewOuter);
1248 SE->forgetLoop(NewInner);
1249}
1250
1251bool LoopInterchangeTransform::transform() {
1252 bool Transformed = false;
1253
1254 if (InnerLoop->getSubLoops().empty()) {
1255 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1256 LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Splitting the inner loop latch\n"
; } } while (false)
;
1257 auto &InductionPHIs = LIL.getInnerLoopInductions();
1258 if (InductionPHIs.empty()) {
1259 LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to find the point to split loop latch \n"
; } } while (false)
;
1260 return false;
1261 }
1262
1263 SmallVector<Instruction *, 8> InnerIndexVarList;
1264 for (PHINode *CurInductionPHI : InductionPHIs) {
1265 if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1266 InnerIndexVarList.push_back(
1267 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
1268 else
1269 InnerIndexVarList.push_back(
1270 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
1271 }
1272
1273 // Create a new latch block for the inner loop. We split at the
1274 // current latch's terminator and then move the condition and all
1275 // operands that are not either loop-invariant or the induction PHI into the
1276 // new latch block.
1277 BasicBlock *NewLatch =
1278 SplitBlock(InnerLoop->getLoopLatch(),
1279 InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
1280
1281 SmallSetVector<Instruction *, 4> WorkList;
1282 unsigned i = 0;
1283 auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() {
1284 for (; i < WorkList.size(); i++) {
1285 // Duplicate instruction and move it the new latch. Update uses that
1286 // have been moved.
1287 Instruction *NewI = WorkList[i]->clone();
1288 NewI->insertBefore(NewLatch->getFirstNonPHI());
1289 assert(!NewI->mayHaveSideEffects() &&(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1291, __extension__
__PRETTY_FUNCTION__))
1290 "Moving instructions with side-effects may change behavior of "(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1291, __extension__
__PRETTY_FUNCTION__))
1291 "the loop nest!")(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1291, __extension__
__PRETTY_FUNCTION__))
;
1292 for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) {
1293 Instruction *UserI = cast<Instruction>(U.getUser());
1294 if (!InnerLoop->contains(UserI->getParent()) ||
1295 UserI->getParent() == NewLatch ||
1296 llvm::is_contained(InductionPHIs, UserI))
1297 U.set(NewI);
1298 }
1299 // Add operands of moved instruction to the worklist, except if they are
1300 // outside the inner loop or are the induction PHI.
1301 for (Value *Op : WorkList[i]->operands()) {
1302 Instruction *OpI = dyn_cast<Instruction>(Op);
1303 if (!OpI ||
1304 this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
1305 llvm::is_contained(InductionPHIs, OpI))
1306 continue;
1307 WorkList.insert(OpI);
1308 }
1309 }
1310 };
1311
1312 // FIXME: Should we interchange when we have a constant condition?
1313 Instruction *CondI = dyn_cast<Instruction>(
1314 cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
1315 ->getCondition());
1316 if (CondI)
1317 WorkList.insert(CondI);
1318 MoveInstructions();
1319 for (Instruction *InnerIndexVar : InnerIndexVarList)
1320 WorkList.insert(cast<Instruction>(InnerIndexVar));
1321 MoveInstructions();
1322
1323 // Splits the inner loops phi nodes out into a separate basic block.
1324 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1325 SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1326 LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "splitting InnerLoopHeader done\n"
; } } while (false)
;
1327 }
1328
1329 // Instructions in the original inner loop preheader may depend on values
1330 // defined in the outer loop header. Move them there, because the original
1331 // inner loop preheader will become the entry into the interchanged loop nest.
1332 // Currently we move all instructions and rely on LICM to move invariant
1333 // instructions outside the loop nest.
1334 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1335 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1336 if (InnerLoopPreHeader != OuterLoopHeader) {
1337 SmallPtrSet<Instruction *, 4> NeedsMoving;
1338 for (Instruction &I :
1339 make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
1340 std::prev(InnerLoopPreHeader->end()))))
1341 I.moveBefore(OuterLoopHeader->getTerminator());
1342 }
1343
1344 Transformed |= adjustLoopLinks();
1345 if (!Transformed) {
1346 LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "adjustLoopLinks failed\n"
; } } while (false)
;
1347 return false;
1348 }
1349
1350 return true;
1351}
1352
1353/// \brief Move all instructions except the terminator from FromBB right before
1354/// InsertBefore
1355static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1356 auto &ToList = InsertBefore->getParent()->getInstList();
1357 auto &FromList = FromBB->getInstList();
1358
1359 ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
1360 FromBB->getTerminator()->getIterator());
1361}
1362
1363/// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
1364static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
1365 // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
1366 // from BB1 afterwards.
1367 auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
1368 SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
1369 for (Instruction *I : TempInstrs)
1370 I->removeFromParent();
1371
1372 // Move instructions from BB2 to BB1.
1373 moveBBContents(BB2, BB1->getTerminator());
1374
1375 // Move instructions from TempInstrs to BB2.
1376 for (Instruction *I : TempInstrs)
1377 I->insertBefore(BB2->getTerminator());
1378}
1379
1380// Update BI to jump to NewBB instead of OldBB. Records updates to the
1381// dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1382// \p OldBB is exactly once in BI's successor list.
1383static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1384 BasicBlock *NewBB,
1385 std::vector<DominatorTree::UpdateType> &DTUpdates,
1386 bool MustUpdateOnce = true) {
1387 assert((!MustUpdateOnce ||(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1391, __extension__
__PRETTY_FUNCTION__))
1388 llvm::count_if(successors(BI),(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1391, __extension__
__PRETTY_FUNCTION__))
1389 [OldBB](BasicBlock *BB) {(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1391, __extension__
__PRETTY_FUNCTION__))
1390 return BB == OldBB;(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1391, __extension__
__PRETTY_FUNCTION__))
1391 }) == 1) && "BI must jump to OldBB exactly once.")(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1391, __extension__
__PRETTY_FUNCTION__))
;
1392 bool Changed = false;
1393 for (Use &Op : BI->operands())
1394 if (Op == OldBB) {
1395 Op.set(NewBB);
1396 Changed = true;
1397 }
1398
1399 if (Changed) {
1400 DTUpdates.push_back(
1401 {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
1402 DTUpdates.push_back(
1403 {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
1404 }
1405 assert(Changed && "Expected a successor to be updated")(static_cast <bool> (Changed && "Expected a successor to be updated"
) ? void (0) : __assert_fail ("Changed && \"Expected a successor to be updated\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1405, __extension__
__PRETTY_FUNCTION__))
;
1406}
1407
1408// Move Lcssa PHIs to the right place.
1409static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
1410 BasicBlock *InnerLatch, BasicBlock *OuterHeader,
1411 BasicBlock *OuterLatch, BasicBlock *OuterExit,
1412 Loop *InnerLoop, LoopInfo *LI) {
1413
1414 // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
1415 // defined either in the header or latch. Those blocks will become header and
1416 // latch of the new outer loop, and the only possible users can PHI nodes
1417 // in the exit block of the loop nest or the outer loop header (reduction
1418 // PHIs, in that case, the incoming value must be defined in the inner loop
1419 // header). We can just substitute the user with the incoming value and remove
1420 // the PHI.
1421 for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
1422 assert(P.getNumIncomingValues() == 1 &&(static_cast <bool> (P.getNumIncomingValues() == 1 &&
"Only loops with a single exit are supported!") ? void (0) :
__assert_fail ("P.getNumIncomingValues() == 1 && \"Only loops with a single exit are supported!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1423, __extension__
__PRETTY_FUNCTION__))
1423 "Only loops with a single exit are supported!")(static_cast <bool> (P.getNumIncomingValues() == 1 &&
"Only loops with a single exit are supported!") ? void (0) :
__assert_fail ("P.getNumIncomingValues() == 1 && \"Only loops with a single exit are supported!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1423, __extension__
__PRETTY_FUNCTION__))
;
1424
1425 // Incoming values are guaranteed be instructions currently.
1426 auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
1427 // Skip phis with incoming values from the inner loop body, excluding the
1428 // header and latch.
1429 if (IncI->getParent() != InnerLatch && IncI->getParent() != InnerHeader)
1430 continue;
1431
1432 assert(all_of(P.users(),(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1440, __extension__
__PRETTY_FUNCTION__))
1433 [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1440, __extension__
__PRETTY_FUNCTION__))
1434 return (cast<PHINode>(U)->getParent() == OuterHeader &&(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1440, __extension__
__PRETTY_FUNCTION__))
1435 IncI->getParent() == InnerHeader) ||(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1440, __extension__
__PRETTY_FUNCTION__))
1436 cast<PHINode>(U)->getParent() == OuterExit;(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1440, __extension__
__PRETTY_FUNCTION__))
1437 }) &&(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1440, __extension__
__PRETTY_FUNCTION__))
1438 "Can only replace phis iff the uses are in the loop nest exit or "(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1440, __extension__
__PRETTY_FUNCTION__))
1439 "the incoming value is defined in the inner header (it will "(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1440, __extension__
__PRETTY_FUNCTION__))
1440 "dominate all loop blocks after interchanging)")(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1440, __extension__
__PRETTY_FUNCTION__))
;
1441 P.replaceAllUsesWith(IncI);
1442 P.eraseFromParent();
1443 }
1444
1445 SmallVector<PHINode *, 8> LcssaInnerExit;
1446 for (PHINode &P : InnerExit->phis())
1447 LcssaInnerExit.push_back(&P);
1448
1449 SmallVector<PHINode *, 8> LcssaInnerLatch;
1450 for (PHINode &P : InnerLatch->phis())
1451 LcssaInnerLatch.push_back(&P);
1452
1453 // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1454 // If a PHI node has users outside of InnerExit, it has a use outside the
1455 // interchanged loop and we have to preserve it. We move these to
1456 // InnerLatch, which will become the new exit block for the innermost
1457 // loop after interchanging.
1458 for (PHINode *P : LcssaInnerExit)
1
Assuming '__begin1' is equal to '__end1'
1459 P->moveBefore(InnerLatch->getFirstNonPHI());
1460
1461 // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1462 // and we have to move them to the new inner latch.
1463 for (PHINode *P : LcssaInnerLatch)
2
Assuming '__begin1' is equal to '__end1'
1464 P->moveBefore(InnerExit->getFirstNonPHI());
1465
1466 // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1467 // incoming values defined in the outer loop, we have to add a new PHI
1468 // in the inner loop latch, which became the exit block of the outer loop,
1469 // after interchanging.
1470 if (OuterExit) {
3
Assuming 'OuterExit' is non-null
4
Taking true branch
1471 for (PHINode &P : OuterExit->phis()) {
1472 if (P.getNumIncomingValues() != 1)
5
Assuming the condition is false
6
Taking false branch
1473 continue;
1474 // Skip Phis with incoming values defined in the inner loop. Those should
1475 // already have been updated.
1476 auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
1477 if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
7
Assuming 'I' is non-null
8
Assuming the condition is false
9
Taking false branch
1478 continue;
1479
1480 PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
10
Assuming the object is not a 'PHINode'
11
'NewPhi' initialized to a null pointer value
1481 NewPhi->setIncomingValue(0, P.getIncomingValue(0));
12
Called C++ object pointer is null
1482 NewPhi->setIncomingBlock(0, OuterLatch);
1483 // We might have incoming edges from other BBs, i.e., the original outer
1484 // header.
1485 for (auto *Pred : predecessors(InnerLatch)) {
1486 if (Pred == OuterLatch)
1487 continue;
1488 NewPhi->addIncoming(P.getIncomingValue(0), Pred);
1489 }
1490 NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
1491 P.setIncomingValue(0, NewPhi);
1492 }
1493 }
1494
1495 // Now adjust the incoming blocks for the LCSSA PHIs.
1496 // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1497 // with the new latch.
1498 InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
1499}
1500
1501bool LoopInterchangeTransform::adjustLoopBranches() {
1502 LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "adjustLoopBranches called\n"
; } } while (false)
;
1503 std::vector<DominatorTree::UpdateType> DTUpdates;
1504
1505 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1506 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1507
1508 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1510, __extension__
__PRETTY_FUNCTION__))
1509 InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1510, __extension__
__PRETTY_FUNCTION__))
1510 InnerLoopPreHeader && "Guaranteed by loop-simplify form")(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1510, __extension__
__PRETTY_FUNCTION__))
;
1511 // Ensure that both preheaders do not contain PHI nodes and have single
1512 // predecessors. This allows us to move them easily. We use
1513 // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1514 // preheaders do not satisfy those conditions.
1515 if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1516 !OuterLoopPreHeader->getUniquePredecessor())
1517 OuterLoopPreHeader =
1518 InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
1519 if (InnerLoopPreHeader == OuterLoop->getHeader())
1520 InnerLoopPreHeader =
1521 InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
1522
1523 // Adjust the loop preheader
1524 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1525 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1526 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1527 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1528 BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1529 BasicBlock *InnerLoopLatchPredecessor =
1530 InnerLoopLatch->getUniquePredecessor();
1531 BasicBlock *InnerLoopLatchSuccessor;
1532 BasicBlock *OuterLoopLatchSuccessor;
1533
1534 BranchInst *OuterLoopLatchBI =
1535 dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1536 BranchInst *InnerLoopLatchBI =
1537 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1538 BranchInst *OuterLoopHeaderBI =
1539 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1540 BranchInst *InnerLoopHeaderBI =
1541 dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1542
1543 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1544 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1545 !InnerLoopHeaderBI)
1546 return false;
1547
1548 BranchInst *InnerLoopLatchPredecessorBI =
1549 dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1550 BranchInst *OuterLoopPredecessorBI =
1551 dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1552
1553 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1554 return false;
1555 BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1556 if (!InnerLoopHeaderSuccessor)
1557 return false;
1558
1559 // Adjust Loop Preheader and headers.
1560 // The branches in the outer loop predecessor and the outer loop header can
1561 // be unconditional branches or conditional branches with duplicates. Consider
1562 // this when updating the successors.
1563 updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1564 InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
1565 // The outer loop header might or might not branch to the outer latch.
1566 // We are guaranteed to branch to the inner loop preheader.
1567 if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
1568 // In this case the outerLoopHeader should branch to the InnerLoopLatch.
1569 updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
1570 DTUpdates,
1571 /*MustUpdateOnce=*/false);
1572 }
1573 updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1574 InnerLoopHeaderSuccessor, DTUpdates,
1575 /*MustUpdateOnce=*/false);
1576
1577 // Adjust reduction PHI's now that the incoming block has changed.
1578 InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
1579 OuterLoopHeader);
1580
1581 updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1582 OuterLoopPreHeader, DTUpdates);
1583
1584 // -------------Adjust loop latches-----------
1585 if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1586 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1587 else
1588 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1589
1590 updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1591 InnerLoopLatchSuccessor, DTUpdates);
1592
1593 if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1594 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1595 else
1596 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1597
1598 updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1599 OuterLoopLatchSuccessor, DTUpdates);
1600 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1601 DTUpdates);
1602
1603 DT->applyUpdates(DTUpdates);
1604 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1605 OuterLoopPreHeader);
1606
1607 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1608 OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
1609 InnerLoop, LI);
1610 // For PHIs in the exit block of the outer loop, outer's latch has been
1611 // replaced by Inners'.
1612 OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1613
1614 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1615 // Now update the reduction PHIs in the inner and outer loop headers.
1616 SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1617 for (PHINode &PHI : InnerLoopHeader->phis())
1618 if (OuterInnerReductions.contains(&PHI))
1619 InnerLoopPHIs.push_back(&PHI);
1620
1621 for (PHINode &PHI : OuterLoopHeader->phis())
1622 if (OuterInnerReductions.contains(&PHI))
1623 OuterLoopPHIs.push_back(&PHI);
1624
1625 // Now move the remaining reduction PHIs from outer to inner loop header and
1626 // vice versa. The PHI nodes must be part of a reduction across the inner and
1627 // outer loop and all the remains to do is and updating the incoming blocks.
1628 for (PHINode *PHI : OuterLoopPHIs) {
1629 LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Outer loop reduction PHIs:\n"
; PHI->dump();; } } while (false)
;
1630 PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
1631 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node")(static_cast <bool> (OuterInnerReductions.count(PHI) &&
"Expected a reduction PHI node") ? void (0) : __assert_fail (
"OuterInnerReductions.count(PHI) && \"Expected a reduction PHI node\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1631, __extension__
__PRETTY_FUNCTION__))
;
1632 }
1633 for (PHINode *PHI : InnerLoopPHIs) {
1634 LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop reduction PHIs:\n"
; PHI->dump();; } } while (false)
;
1635 PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
1636 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node")(static_cast <bool> (OuterInnerReductions.count(PHI) &&
"Expected a reduction PHI node") ? void (0) : __assert_fail (
"OuterInnerReductions.count(PHI) && \"Expected a reduction PHI node\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1636, __extension__
__PRETTY_FUNCTION__))
;
1637 }
1638
1639 // Update the incoming blocks for moved PHI nodes.
1640 OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
1641 OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
1642 InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
1643 InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1644
1645 // Values defined in the outer loop header could be used in the inner loop
1646 // latch. In that case, we need to create LCSSA phis for them, because after
1647 // interchanging they will be defined in the new inner loop and used in the
1648 // new outer loop.
1649 IRBuilder<> Builder(OuterLoopHeader->getContext());
1650 SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
1651 for (Instruction &I :
1652 make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
1653 MayNeedLCSSAPhis.push_back(&I);
1654 formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE, Builder);
1655
1656 return true;
1657}
1658
1659bool LoopInterchangeTransform::adjustLoopLinks() {
1660 // Adjust all branches in the inner and outer loop.
1661 bool Changed = adjustLoopBranches();
1662 if (Changed) {
1663 // We have interchanged the preheaders so we need to interchange the data in
1664 // the preheaders as well. This is because the content of the inner
1665 // preheader was previously executed inside the outer loop.
1666 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1667 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1668 swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
1669 }
1670 return Changed;
1671}
1672
1673namespace {
1674/// Main LoopInterchange Pass.
1675struct LoopInterchangeLegacyPass : public LoopPass {
1676 static char ID;
1677
1678 LoopInterchangeLegacyPass() : LoopPass(ID) {
1679 initializeLoopInterchangeLegacyPassPass(*PassRegistry::getPassRegistry());
1680 }
1681
1682 void getAnalysisUsage(AnalysisUsage &AU) const override {
1683 AU.addRequired<DependenceAnalysisWrapperPass>();
1684 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1685
1686 getLoopAnalysisUsage(AU);
1687 }
1688
1689 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1690 if (skipLoop(L))
1691 return false;
1692
1693 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1694 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1695 auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
1696 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1697 auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1698
1699 return LoopInterchange(SE, LI, DI, DT, ORE).run(L);
1700 }
1701};
1702} // namespace
1703
1704char LoopInterchangeLegacyPass::ID = 0;
1705
1706INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange",static void *initializeLoopInterchangeLegacyPassPassOnce(PassRegistry
&Registry) {
1707 "Interchanges loops for cache reuse", false, false)static void *initializeLoopInterchangeLegacyPassPassOnce(PassRegistry
&Registry) {
1708INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry);
1709INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)initializeDependenceAnalysisWrapperPassPass(Registry);
1710INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)initializeOptimizationRemarkEmitterWrapperPassPass(Registry);
1711
1712INITIALIZE_PASS_END(LoopInterchangeLegacyPass, "loop-interchange",PassInfo *PI = new PassInfo( "Interchanges loops for cache reuse"
, "loop-interchange", &LoopInterchangeLegacyPass::ID, PassInfo
::NormalCtor_t(callDefaultCtor<LoopInterchangeLegacyPass>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeLoopInterchangeLegacyPassPassFlag
; void llvm::initializeLoopInterchangeLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeLoopInterchangeLegacyPassPassFlag
, initializeLoopInterchangeLegacyPassPassOnce, std::ref(Registry
)); }
1713 "Interchanges loops for cache reuse", false, false)PassInfo *PI = new PassInfo( "Interchanges loops for cache reuse"
, "loop-interchange", &LoopInterchangeLegacyPass::ID, PassInfo
::NormalCtor_t(callDefaultCtor<LoopInterchangeLegacyPass>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeLoopInterchangeLegacyPassPassFlag
; void llvm::initializeLoopInterchangeLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeLoopInterchangeLegacyPassPassFlag
, initializeLoopInterchangeLegacyPassPassOnce, std::ref(Registry
)); }
1714
1715Pass *llvm::createLoopInterchangePass() {
1716 return new LoopInterchangeLegacyPass();
1717}
1718
1719PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
1720 LoopAnalysisManager &AM,
1721 LoopStandardAnalysisResults &AR,
1722 LPMUpdater &U) {
1723 Function &F = *LN.getParent();
1724
1725 DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
1726 OptimizationRemarkEmitter ORE(&F);
1727 if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(LN))
1728 return PreservedAnalyses::all();
1729 return getLoopPassPreservedAnalyses();
1730}