Bug Summary

File:build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
Warning:line 665, column 8
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name LoopInterchange.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm -resource-dir /usr/lib/llvm-15/lib/clang/15.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Transforms/Scalar -I include -I /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-15/lib/clang/15.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm=build-llvm -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm=build-llvm -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm=build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-04-20-140412-16051-1 -x c++ /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
1//===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This Pass handles loop interchange transform.
10// This pass interchanges loops to provide a more cache-friendly memory access
11// patterns.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/Scalar/LoopInterchange.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/Analysis/DependenceAnalysis.h"
21#include "llvm/Analysis/LoopInfo.h"
22#include "llvm/Analysis/LoopNestAnalysis.h"
23#include "llvm/Analysis/LoopPass.h"
24#include "llvm/Analysis/OptimizationRemarkEmitter.h"
25#include "llvm/Analysis/ScalarEvolution.h"
26#include "llvm/Analysis/ScalarEvolutionExpressions.h"
27#include "llvm/IR/BasicBlock.h"
28#include "llvm/IR/Constants.h"
29#include "llvm/IR/DiagnosticInfo.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/IRBuilder.h"
33#include "llvm/IR/InstrTypes.h"
34#include "llvm/IR/Instruction.h"
35#include "llvm/IR/Instructions.h"
36#include "llvm/IR/User.h"
37#include "llvm/IR/Value.h"
38#include "llvm/InitializePasses.h"
39#include "llvm/Pass.h"
40#include "llvm/Support/Casting.h"
41#include "llvm/Support/CommandLine.h"
42#include "llvm/Support/Debug.h"
43#include "llvm/Support/ErrorHandling.h"
44#include "llvm/Support/raw_ostream.h"
45#include "llvm/Transforms/Scalar.h"
46#include "llvm/Transforms/Utils/BasicBlockUtils.h"
47#include "llvm/Transforms/Utils/LoopUtils.h"
48#include <cassert>
49#include <utility>
50#include <vector>
51
52using namespace llvm;
53
54#define DEBUG_TYPE"loop-interchange" "loop-interchange"
55
56STATISTIC(LoopsInterchanged, "Number of loops interchanged")static llvm::Statistic LoopsInterchanged = {"loop-interchange"
, "LoopsInterchanged", "Number of loops interchanged"}
;
57
58static cl::opt<int> LoopInterchangeCostThreshold(
59 "loop-interchange-threshold", cl::init(0), cl::Hidden,
60 cl::desc("Interchange if you gain more than this number"));
61
62namespace {
63
64using LoopVector = SmallVector<Loop *, 8>;
65
66// TODO: Check if we can use a sparse matrix here.
67using CharMatrix = std::vector<std::vector<char>>;
68
69} // end anonymous namespace
70
71// Maximum number of dependencies that can be handled in the dependency matrix.
72static const unsigned MaxMemInstrCount = 100;
73
74// Maximum loop depth supported.
75static const unsigned MaxLoopNestDepth = 10;
76
77#ifdef DUMP_DEP_MATRICIES
78static void printDepMatrix(CharMatrix &DepMatrix) {
79 for (auto &Row : DepMatrix) {
80 for (auto D : Row)
81 LLVM_DEBUG(dbgs() << D << " ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << D << " "; } } while
(false)
;
82 LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "\n"; } } while (false
)
;
83 }
84}
85#endif
86
87static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
88 Loop *L, DependenceInfo *DI) {
89 using ValueVector = SmallVector<Value *, 16>;
90
91 ValueVector MemInstr;
92
93 // For each block.
94 for (BasicBlock *BB : L->blocks()) {
95 // Scan the BB and collect legal loads and stores.
96 for (Instruction &I : *BB) {
97 if (!isa<Instruction>(I))
98 return false;
99 if (auto *Ld = dyn_cast<LoadInst>(&I)) {
100 if (!Ld->isSimple())
101 return false;
102 MemInstr.push_back(&I);
103 } else if (auto *St = dyn_cast<StoreInst>(&I)) {
104 if (!St->isSimple())
105 return false;
106 MemInstr.push_back(&I);
107 }
108 }
109 }
110
111 LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found " << MemInstr
.size() << " Loads and Stores to analyze\n"; } } while (
false)
112 << " Loads and Stores to analyze\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found " << MemInstr
.size() << " Loads and Stores to analyze\n"; } } while (
false)
;
113
114 ValueVector::iterator I, IE, J, JE;
115
116 for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
117 for (J = I, JE = MemInstr.end(); J != JE; ++J) {
118 std::vector<char> Dep;
119 Instruction *Src = cast<Instruction>(*I);
120 Instruction *Dst = cast<Instruction>(*J);
121 // Ignore Input dependencies.
122 if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
123 continue;
124 // Track Output, Flow, and Anti dependencies.
125 if (auto D = DI->depends(Src, Dst, true)) {
126 assert(D->isOrdered() && "Expected an output, flow or anti dep.")(static_cast <bool> (D->isOrdered() && "Expected an output, flow or anti dep."
) ? void (0) : __assert_fail ("D->isOrdered() && \"Expected an output, flow or anti dep.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 126, __extension__
__PRETTY_FUNCTION__))
;
127 LLVM_DEBUG(StringRef DepType =do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
128 D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
129 dbgs() << "Found " << DepTypedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
130 << " dependency between Src and Dst\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
131 << " Src:" << *Src << "\n Dst:" << *Dst << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
;
132 unsigned Levels = D->getLevels();
133 char Direction;
134 for (unsigned II = 1; II <= Levels; ++II) {
135 const SCEV *Distance = D->getDistance(II);
136 const SCEVConstant *SCEVConst =
137 dyn_cast_or_null<SCEVConstant>(Distance);
138 if (SCEVConst) {
139 const ConstantInt *CI = SCEVConst->getValue();
140 if (CI->isNegative())
141 Direction = '<';
142 else if (CI->isZero())
143 Direction = '=';
144 else
145 Direction = '>';
146 Dep.push_back(Direction);
147 } else if (D->isScalar(II)) {
148 Direction = 'S';
149 Dep.push_back(Direction);
150 } else {
151 unsigned Dir = D->getDirection(II);
152 if (Dir == Dependence::DVEntry::LT ||
153 Dir == Dependence::DVEntry::LE)
154 Direction = '<';
155 else if (Dir == Dependence::DVEntry::GT ||
156 Dir == Dependence::DVEntry::GE)
157 Direction = '>';
158 else if (Dir == Dependence::DVEntry::EQ)
159 Direction = '=';
160 else
161 Direction = '*';
162 Dep.push_back(Direction);
163 }
164 }
165 while (Dep.size() != Level) {
166 Dep.push_back('I');
167 }
168
169 DepMatrix.push_back(Dep);
170 if (DepMatrix.size() > MaxMemInstrCount) {
171 LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle more than "
<< MaxMemInstrCount << " dependencies inside loop\n"
; } } while (false)
172 << " dependencies inside loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle more than "
<< MaxMemInstrCount << " dependencies inside loop\n"
; } } while (false)
;
173 return false;
174 }
175 }
176 }
177 }
178
179 return true;
180}
181
182// A loop is moved from index 'from' to an index 'to'. Update the Dependence
183// matrix by exchanging the two columns.
184static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
185 unsigned ToIndx) {
186 for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I)
187 std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]);
188}
189
190// Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
191// '>'
192static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
193 unsigned Column) {
194 for (unsigned i = 0; i <= Column; ++i) {
195 if (DepMatrix[Row][i] == '<')
196 return false;
197 if (DepMatrix[Row][i] == '>')
198 return true;
199 }
200 // All dependencies were '=','S' or 'I'
201 return false;
202}
203
204// Checks if no dependence exist in the dependency matrix in Row before Column.
205static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
206 unsigned Column) {
207 for (unsigned i = 0; i < Column; ++i) {
208 if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' &&
209 DepMatrix[Row][i] != 'I')
210 return false;
211 }
212 return true;
213}
214
215static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
216 unsigned OuterLoopId, char InnerDep,
217 char OuterDep) {
218 if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
219 return false;
220
221 if (InnerDep == OuterDep)
222 return true;
223
224 // It is legal to interchange if and only if after interchange no row has a
225 // '>' direction as the leftmost non-'='.
226
227 if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
228 return true;
229
230 if (InnerDep == '<')
231 return true;
232
233 if (InnerDep == '>') {
234 // If OuterLoopId represents outermost loop then interchanging will make the
235 // 1st dependency as '>'
236 if (OuterLoopId == 0)
237 return false;
238
239 // If all dependencies before OuterloopId are '=','S'or 'I'. Then
240 // interchanging will result in this row having an outermost non '='
241 // dependency of '>'
242 if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
243 return true;
244 }
245
246 return false;
247}
248
249// Checks if it is legal to interchange 2 loops.
250// [Theorem] A permutation of the loops in a perfect nest is legal if and only
251// if the direction matrix, after the same permutation is applied to its
252// columns, has no ">" direction as the leftmost non-"=" direction in any row.
253static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
254 unsigned InnerLoopId,
255 unsigned OuterLoopId) {
256 unsigned NumRows = DepMatrix.size();
257 // For each row check if it is valid to interchange.
258 for (unsigned Row = 0; Row < NumRows; ++Row) {
259 char InnerDep = DepMatrix[Row][InnerLoopId];
260 char OuterDep = DepMatrix[Row][OuterLoopId];
261 if (InnerDep == '*' || OuterDep == '*')
262 return false;
263 if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))
264 return false;
265 }
266 return true;
267}
268
269static void populateWorklist(Loop &L, LoopVector &LoopList) {
270 LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
271 << L.getHeader()->getParent()->getName() << " Loop: %"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
272 << L.getHeader()->getName() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
;
273 assert(LoopList.empty() && "LoopList should initially be empty!")(static_cast <bool> (LoopList.empty() && "LoopList should initially be empty!"
) ? void (0) : __assert_fail ("LoopList.empty() && \"LoopList should initially be empty!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 273, __extension__
__PRETTY_FUNCTION__))
;
274 Loop *CurrentLoop = &L;
275 const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
276 while (!Vec->empty()) {
277 // The current loop has multiple subloops in it hence it is not tightly
278 // nested.
279 // Discard all loops above it added into Worklist.
280 if (Vec->size() != 1) {
281 LoopList = {};
282 return;
283 }
284
285 LoopList.push_back(CurrentLoop);
286 CurrentLoop = Vec->front();
287 Vec = &CurrentLoop->getSubLoops();
288 }
289 LoopList.push_back(CurrentLoop);
290 return;
291}
292
293namespace {
294
295/// LoopInterchangeLegality checks if it is legal to interchange the loop.
296class LoopInterchangeLegality {
297public:
298 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
299 OptimizationRemarkEmitter *ORE)
300 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
301
302 /// Check if the loops can be interchanged.
303 bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
304 CharMatrix &DepMatrix);
305
306 /// Discover induction PHIs in the header of \p L. Induction
307 /// PHIs are added to \p Inductions.
308 bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
309
310 /// Check if the loop structure is understood. We do not handle triangular
311 /// loops for now.
312 bool isLoopStructureUnderstood();
313
314 bool currentLimitations();
315
316 const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
317 return OuterInnerReductions;
318 }
319
320 const SmallVectorImpl<PHINode *> &getInnerLoopInductions() const {
321 return InnerLoopInductions;
322 }
323
324private:
325 bool tightlyNested(Loop *Outer, Loop *Inner);
326 bool containsUnsafeInstructions(BasicBlock *BB);
327
328 /// Discover induction and reduction PHIs in the header of \p L. Induction
329 /// PHIs are added to \p Inductions, reductions are added to
330 /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
331 /// to be passed as \p InnerLoop.
332 bool findInductionAndReductions(Loop *L,
333 SmallVector<PHINode *, 8> &Inductions,
334 Loop *InnerLoop);
335
336 Loop *OuterLoop;
337 Loop *InnerLoop;
338
339 ScalarEvolution *SE;
340
341 /// Interface to emit optimization remarks.
342 OptimizationRemarkEmitter *ORE;
343
344 /// Set of reduction PHIs taking part of a reduction across the inner and
345 /// outer loop.
346 SmallPtrSet<PHINode *, 4> OuterInnerReductions;
347
348 /// Set of inner loop induction PHIs
349 SmallVector<PHINode *, 8> InnerLoopInductions;
350};
351
352/// LoopInterchangeProfitability checks if it is profitable to interchange the
353/// loop.
354class LoopInterchangeProfitability {
355public:
356 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
357 OptimizationRemarkEmitter *ORE)
358 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
359
360 /// Check if the loop interchange is profitable.
361 bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
362 CharMatrix &DepMatrix);
363
364private:
365 int getInstrOrderCost();
366
367 Loop *OuterLoop;
368 Loop *InnerLoop;
369
370 /// Scev analysis.
371 ScalarEvolution *SE;
372
373 /// Interface to emit optimization remarks.
374 OptimizationRemarkEmitter *ORE;
375};
376
377/// LoopInterchangeTransform interchanges the loop.
378class LoopInterchangeTransform {
379public:
380 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
381 LoopInfo *LI, DominatorTree *DT,
382 const LoopInterchangeLegality &LIL)
383 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
384
385 /// Interchange OuterLoop and InnerLoop.
386 bool transform();
387 void restructureLoops(Loop *NewInner, Loop *NewOuter,
388 BasicBlock *OrigInnerPreHeader,
389 BasicBlock *OrigOuterPreHeader);
390 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
391
392private:
393 bool adjustLoopLinks();
394 bool adjustLoopBranches();
395
396 Loop *OuterLoop;
397 Loop *InnerLoop;
398
399 /// Scev analysis.
400 ScalarEvolution *SE;
401
402 LoopInfo *LI;
403 DominatorTree *DT;
404
405 const LoopInterchangeLegality &LIL;
406};
407
408struct LoopInterchange {
409 ScalarEvolution *SE = nullptr;
410 LoopInfo *LI = nullptr;
411 DependenceInfo *DI = nullptr;
412 DominatorTree *DT = nullptr;
413
414 /// Interface to emit optimization remarks.
415 OptimizationRemarkEmitter *ORE;
416
417 LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
418 DominatorTree *DT, OptimizationRemarkEmitter *ORE)
419 : SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {}
420
421 bool run(Loop *L) {
422 if (L->getParentLoop())
423 return false;
424 SmallVector<Loop *, 8> LoopList;
425 populateWorklist(*L, LoopList);
426 return processLoopList(LoopList);
427 }
428
429 bool run(LoopNest &LN) {
430 SmallVector<Loop *, 8> LoopList(LN.getLoops().begin(), LN.getLoops().end());
431 for (unsigned I = 1; I < LoopList.size(); ++I)
432 if (LoopList[I]->getParentLoop() != LoopList[I - 1])
433 return false;
434 return processLoopList(LoopList);
435 }
436
437 bool isComputableLoopNest(ArrayRef<Loop *> LoopList) {
438 for (Loop *L : LoopList) {
439 const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
440 if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
441 LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Couldn't compute backedge count\n"
; } } while (false)
;
442 return false;
443 }
444 if (L->getNumBackEdges() != 1) {
445 LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "NumBackEdges is not equal to 1\n"
; } } while (false)
;
446 return false;
447 }
448 if (!L->getExitingBlock()) {
449 LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop doesn't have unique exit block\n"
; } } while (false)
;
450 return false;
451 }
452 }
453 return true;
454 }
455
456 unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
457 // TODO: Add a better heuristic to select the loop to be interchanged based
458 // on the dependence matrix. Currently we select the innermost loop.
459 return LoopList.size() - 1;
460 }
461
462 bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
463 bool Changed = false;
464 unsigned LoopNestDepth = LoopList.size();
465 if (LoopNestDepth < 2) {
466 LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop doesn't contain minimum nesting level.\n"
; } } while (false)
;
467 return false;
468 }
469 if (LoopNestDepth > MaxLoopNestDepth) {
470 LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle loops of depth greater than "
<< MaxLoopNestDepth << "\n"; } } while (false)
471 << MaxLoopNestDepth << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle loops of depth greater than "
<< MaxLoopNestDepth << "\n"; } } while (false)
;
472 return false;
473 }
474 if (!isComputableLoopNest(LoopList)) {
475 LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not valid loop candidate for interchange\n"
; } } while (false)
;
476 return false;
477 }
478
479 LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepthdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing LoopList of size = "
<< LoopNestDepth << "\n"; } } while (false)
480 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing LoopList of size = "
<< LoopNestDepth << "\n"; } } while (false)
;
481
482 CharMatrix DependencyMatrix;
483 Loop *OuterMostLoop = *(LoopList.begin());
484 if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
485 OuterMostLoop, DI)) {
486 LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Populating dependency matrix failed\n"
; } } while (false)
;
487 return false;
488 }
489#ifdef DUMP_DEP_MATRICIES
490 LLVM_DEBUG(dbgs() << "Dependence before interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Dependence before interchange\n"
; } } while (false)
;
491 printDepMatrix(DependencyMatrix);
492#endif
493
494 // Get the Outermost loop exit.
495 BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
496 if (!LoopNestExit) {
497 LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "OuterMostLoop needs an unique exit block"
; } } while (false)
;
498 return false;
499 }
500
501 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
502 // We try to achieve the globally optimal memory access for the loopnest,
503 // and do interchange based on a bubble-sort fasion. We start from
504 // the innermost loop, move it outwards to the best possible position
505 // and repeat this process.
506 for (unsigned j = SelecLoopId; j > 0; j--) {
507 bool ChangedPerIter = false;
508 for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
509 bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
510 DependencyMatrix);
511 if (!Interchanged)
512 continue;
513 // Loops interchanged, update LoopList accordingly.
514 std::swap(LoopList[i - 1], LoopList[i]);
515 // Update the DependencyMatrix
516 interChangeDependencies(DependencyMatrix, i, i - 1);
517#ifdef DUMP_DEP_MATRICIES
518 LLVM_DEBUG(dbgs() << "Dependence after interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Dependence after interchange\n"
; } } while (false)
;
519 printDepMatrix(DependencyMatrix);
520#endif
521 ChangedPerIter |= Interchanged;
522 Changed |= Interchanged;
523 }
524 // Early abort if there was no interchange during an entire round of
525 // moving loops outwards.
526 if (!ChangedPerIter)
527 break;
528 }
529 return Changed;
530 }
531
532 bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
533 unsigned OuterLoopId,
534 std::vector<std::vector<char>> &DependencyMatrix) {
535 LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << "\n"; } } while (false)
536 << " and OuterLoopId = " << OuterLoopId << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << "\n"; } } while (false)
;
537 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
538 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
539 LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not interchanging loops. Cannot prove legality.\n"
; } } while (false)
;
540 return false;
541 }
542 LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops are legal to interchange\n"
; } } while (false)
;
543 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
544 if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
545 LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Interchanging loops not profitable.\n"
; } } while (false)
;
546 return false;
547 }
548
549 ORE->emit([&]() {
550 return OptimizationRemark(DEBUG_TYPE"loop-interchange", "Interchanged",
551 InnerLoop->getStartLoc(),
552 InnerLoop->getHeader())
553 << "Loop interchanged with enclosing loop.";
554 });
555
556 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
557 LIT.transform();
558 LLVM_DEBUG(dbgs() << "Loops interchanged.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops interchanged.\n"
; } } while (false)
;
559 LoopsInterchanged++;
560
561 assert(InnerLoop->isLCSSAForm(*DT) &&(static_cast <bool> (InnerLoop->isLCSSAForm(*DT) &&
"Inner loop not left in LCSSA form after loop interchange!")
? void (0) : __assert_fail ("InnerLoop->isLCSSAForm(*DT) && \"Inner loop not left in LCSSA form after loop interchange!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 562, __extension__
__PRETTY_FUNCTION__))
562 "Inner loop not left in LCSSA form after loop interchange!")(static_cast <bool> (InnerLoop->isLCSSAForm(*DT) &&
"Inner loop not left in LCSSA form after loop interchange!")
? void (0) : __assert_fail ("InnerLoop->isLCSSAForm(*DT) && \"Inner loop not left in LCSSA form after loop interchange!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 562, __extension__
__PRETTY_FUNCTION__))
;
563 assert(OuterLoop->isLCSSAForm(*DT) &&(static_cast <bool> (OuterLoop->isLCSSAForm(*DT) &&
"Outer loop not left in LCSSA form after loop interchange!")
? void (0) : __assert_fail ("OuterLoop->isLCSSAForm(*DT) && \"Outer loop not left in LCSSA form after loop interchange!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 564, __extension__
__PRETTY_FUNCTION__))
564 "Outer loop not left in LCSSA form after loop interchange!")(static_cast <bool> (OuterLoop->isLCSSAForm(*DT) &&
"Outer loop not left in LCSSA form after loop interchange!")
? void (0) : __assert_fail ("OuterLoop->isLCSSAForm(*DT) && \"Outer loop not left in LCSSA form after loop interchange!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 564, __extension__
__PRETTY_FUNCTION__))
;
565
566 return true;
567 }
568};
569
570} // end anonymous namespace
571
572bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
573 return any_of(*BB, [](const Instruction &I) {
574 return I.mayHaveSideEffects() || I.mayReadFromMemory();
575 });
576}
577
578bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
579 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
580 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
581 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
582
583 LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Checking if loops are tightly nested\n"
; } } while (false)
;
584
585 // A perfectly nested loop will not have any branch in between the outer and
586 // inner block i.e. outer header will branch to either inner preheader and
587 // outerloop latch.
588 BranchInst *OuterLoopHeaderBI =
589 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
590 if (!OuterLoopHeaderBI)
591 return false;
592
593 for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
594 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
595 Succ != OuterLoopLatch)
596 return false;
597
598 LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Checking instructions in Loop header and Loop latch\n"
; } } while (false)
;
599 // We do not have any basic block in between now make sure the outer header
600 // and outer loop latch doesn't contain any unsafe instructions.
601 if (containsUnsafeInstructions(OuterLoopHeader) ||
602 containsUnsafeInstructions(OuterLoopLatch))
603 return false;
604
605 // Also make sure the inner loop preheader does not contain any unsafe
606 // instructions. Note that all instructions in the preheader will be moved to
607 // the outer loop header when interchanging.
608 if (InnerLoopPreHeader != OuterLoopHeader &&
609 containsUnsafeInstructions(InnerLoopPreHeader))
610 return false;
611
612 BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
613 // Ensure the inner loop exit block flows to the outer loop latch possibly
614 // through empty blocks.
615 const BasicBlock &SuccInner =
616 LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch);
617 if (&SuccInner != OuterLoopLatch) {
618 LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExitdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop exit block "
<< *InnerLoopExit << " does not lead to the outer loop latch.\n"
;; } } while (false)
619 << " does not lead to the outer loop latch.\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop exit block "
<< *InnerLoopExit << " does not lead to the outer loop latch.\n"
;; } } while (false)
;
620 return false;
621 }
622 // The inner loop exit block does flow to the outer loop latch and not some
623 // other BBs, now make sure it contains safe instructions, since it will be
624 // moved into the (new) inner loop after interchange.
625 if (containsUnsafeInstructions(InnerLoopExit))
626 return false;
627
628 LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops are perfectly nested\n"
; } } while (false)
;
629 // We have a perfect loop nest.
630 return true;
631}
632
633bool LoopInterchangeLegality::isLoopStructureUnderstood() {
634 BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
635 for (PHINode *InnerInduction : InnerLoopInductions) {
11
Assuming '__begin1' is equal to '__end1'
636 unsigned Num = InnerInduction->getNumOperands();
637 for (unsigned i = 0; i < Num; ++i) {
638 Value *Val = InnerInduction->getOperand(i);
639 if (isa<Constant>(Val))
640 continue;
641 Instruction *I = dyn_cast<Instruction>(Val);
642 if (!I)
643 return false;
644 // TODO: Handle triangular loops.
645 // e.g. for(int i=0;i<N;i++)
646 // for(int j=i;j<N;j++)
647 unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
648 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
649 InnerLoopPreheader &&
650 !OuterLoop->isLoopInvariant(I)) {
651 return false;
652 }
653 }
654 }
655
656 // TODO: Handle triangular loops of another form.
657 // e.g. for(int i=0;i<N;i++)
658 // for(int j=0;j<i;j++)
659 // or,
660 // for(int i=0;i<N;i++)
661 // for(int j=0;j*i<N;j++)
662 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
663 BranchInst *InnerLoopLatchBI =
13
'InnerLoopLatchBI' initialized to a null pointer value
664 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
12
Assuming the object is not a 'BranchInst'
665 if (!InnerLoopLatchBI->isConditional())
14
Called C++ object pointer is null
666 return false;
667 if (CmpInst *InnerLoopCmp =
668 dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) {
669 Value *Op0 = InnerLoopCmp->getOperand(0);
670 Value *Op1 = InnerLoopCmp->getOperand(1);
671
672 // LHS and RHS of the inner loop exit condition, e.g.,
673 // in "for(int j=0;j<i;j++)", LHS is j and RHS is i.
674 Value *Left = nullptr;
675 Value *Right = nullptr;
676
677 // Check if V only involves inner loop induction variable.
678 // Return true if V is InnerInduction, or a cast from
679 // InnerInduction, or a binary operator that involves
680 // InnerInduction and a constant.
681 std::function<bool(Value *)> IsPathToInnerIndVar;
682 IsPathToInnerIndVar = [this, &IsPathToInnerIndVar](const Value *V) -> bool {
683 if (llvm::is_contained(InnerLoopInductions, V))
684 return true;
685 if (isa<Constant>(V))
686 return true;
687 const Instruction *I = dyn_cast<Instruction>(V);
688 if (!I)
689 return false;
690 if (isa<CastInst>(I))
691 return IsPathToInnerIndVar(I->getOperand(0));
692 if (isa<BinaryOperator>(I))
693 return IsPathToInnerIndVar(I->getOperand(0)) &&
694 IsPathToInnerIndVar(I->getOperand(1));
695 return false;
696 };
697
698 // In case of multiple inner loop indvars, it is okay if LHS and RHS
699 // are both inner indvar related variables.
700 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
701 return true;
702
703 // Otherwise we check if the cmp instruction compares an inner indvar
704 // related variable (Left) with a outer loop invariant (Right).
705 if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
706 Left = Op0;
707 Right = Op1;
708 } else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
709 Left = Op1;
710 Right = Op0;
711 }
712
713 if (Left == nullptr)
714 return false;
715
716 const SCEV *S = SE->getSCEV(Right);
717 if (!SE->isLoopInvariant(S, OuterLoop))
718 return false;
719 }
720
721 return true;
722}
723
724// If SV is a LCSSA PHI node with a single incoming value, return the incoming
725// value.
726static Value *followLCSSA(Value *SV) {
727 PHINode *PHI = dyn_cast<PHINode>(SV);
728 if (!PHI)
729 return SV;
730
731 if (PHI->getNumIncomingValues() != 1)
732 return SV;
733 return followLCSSA(PHI->getIncomingValue(0));
734}
735
736// Check V's users to see if it is involved in a reduction in L.
737static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
738 // Reduction variables cannot be constants.
739 if (isa<Constant>(V))
740 return nullptr;
741
742 for (Value *User : V->users()) {
743 if (PHINode *PHI = dyn_cast<PHINode>(User)) {
744 if (PHI->getNumIncomingValues() == 1)
745 continue;
746 RecurrenceDescriptor RD;
747 if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) {
748 // Detect floating point reduction only when it can be reordered.
749 if (RD.getExactFPMathInst() != nullptr)
750 return nullptr;
751 return PHI;
752 }
753 return nullptr;
754 }
755 }
756
757 return nullptr;
758}
759
760bool LoopInterchangeLegality::findInductionAndReductions(
761 Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
762 if (!L->getLoopLatch() || !L->getLoopPredecessor())
763 return false;
764 for (PHINode &PHI : L->getHeader()->phis()) {
765 RecurrenceDescriptor RD;
766 InductionDescriptor ID;
767 if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
768 Inductions.push_back(&PHI);
769 else {
770 // PHIs in inner loops need to be part of a reduction in the outer loop,
771 // discovered when checking the PHIs of the outer loop earlier.
772 if (!InnerLoop) {
773 if (!OuterInnerReductions.count(&PHI)) {
774 LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop PHI is not part of reductions "
"across the outer loop.\n"; } } while (false)
775 "across the outer loop.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop PHI is not part of reductions "
"across the outer loop.\n"; } } while (false)
;
776 return false;
777 }
778 } else {
779 assert(PHI.getNumIncomingValues() == 2 &&(static_cast <bool> (PHI.getNumIncomingValues() == 2 &&
"Phis in loop header should have exactly 2 incoming values")
? void (0) : __assert_fail ("PHI.getNumIncomingValues() == 2 && \"Phis in loop header should have exactly 2 incoming values\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 780, __extension__
__PRETTY_FUNCTION__))
780 "Phis in loop header should have exactly 2 incoming values")(static_cast <bool> (PHI.getNumIncomingValues() == 2 &&
"Phis in loop header should have exactly 2 incoming values")
? void (0) : __assert_fail ("PHI.getNumIncomingValues() == 2 && \"Phis in loop header should have exactly 2 incoming values\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 780, __extension__
__PRETTY_FUNCTION__))
;
781 // Check if we have a PHI node in the outer loop that has a reduction
782 // result from the inner loop as an incoming value.
783 Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
784 PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
785 if (!InnerRedPhi ||
786 !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
787 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
788 dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
789 << "Failed to recognize PHI as an induction or reduction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
;
790 return false;
791 }
792 OuterInnerReductions.insert(&PHI);
793 OuterInnerReductions.insert(InnerRedPhi);
794 }
795 }
796 }
797 return true;
798}
799
800// This function indicates the current limitations in the transform as a result
801// of which we do not proceed.
802bool LoopInterchangeLegality::currentLimitations() {
803 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
804
805 // transform currently expects the loop latches to also be the exiting
806 // blocks.
807 if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
1
Assuming the condition is false
5
Taking false branch
808 OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
2
Assuming the condition is false
809 !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
3
Assuming the object is a 'BranchInst'
810 !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
4
Assuming the object is a 'BranchInst'
811 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
812 dbgs() << "Loops where the latch is not the exiting block are not"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
813 << " supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
;
814 ORE->emit([&]() {
815 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "ExitingNotLatch",
816 OuterLoop->getStartLoc(),
817 OuterLoop->getHeader())
818 << "Loops where the latch is not the exiting block cannot be"
819 " interchange currently.";
820 });
821 return true;
822 }
823
824 SmallVector<PHINode *, 8> Inductions;
825 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
6
Assuming the condition is false
7
Taking false branch
826 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
827 dbgs() << "Only outer loops with induction or reduction PHI nodes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
828 << "are supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
;
829 ORE->emit([&]() {
830 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIOuter",
831 OuterLoop->getStartLoc(),
832 OuterLoop->getHeader())
833 << "Only outer loops with induction or reduction PHI nodes can be"
834 " interchanged currently.";
835 });
836 return true;
837 }
838
839 Inductions.clear();
840 if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) {
8
Assuming the condition is false
9
Taking false branch
841 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
842 dbgs() << "Only inner loops with induction or reduction PHI nodes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
843 << "are supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
;
844 ORE->emit([&]() {
845 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIInner",
846 InnerLoop->getStartLoc(),
847 InnerLoop->getHeader())
848 << "Only inner loops with induction or reduction PHI nodes can be"
849 " interchange currently.";
850 });
851 return true;
852 }
853
854 // TODO: Triangular loops are not handled for now.
855 if (!isLoopStructureUnderstood()) {
10
Calling 'LoopInterchangeLegality::isLoopStructureUnderstood'
856 LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop structure not understood by pass\n"
; } } while (false)
;
857 ORE->emit([&]() {
858 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedStructureInner",
859 InnerLoop->getStartLoc(),
860 InnerLoop->getHeader())
861 << "Inner loop structure not understood currently.";
862 });
863 return true;
864 }
865
866 return false;
867}
868
869bool LoopInterchangeLegality::findInductions(
870 Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
871 for (PHINode &PHI : L->getHeader()->phis()) {
872 InductionDescriptor ID;
873 if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
874 Inductions.push_back(&PHI);
875 }
876 return !Inductions.empty();
877}
878
879// We currently only support LCSSA PHI nodes in the inner loop exit, if their
880// users are either reduction PHIs or PHIs outside the outer loop (which means
881// the we are only interested in the final value after the loop).
882static bool
883areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL,
884 SmallPtrSetImpl<PHINode *> &Reductions) {
885 BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
886 for (PHINode &PHI : InnerExit->phis()) {
887 // Reduction lcssa phi will have only 1 incoming block that from loop latch.
888 if (PHI.getNumIncomingValues() > 1)
889 return false;
890 if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
891 PHINode *PN = dyn_cast<PHINode>(U);
892 return !PN ||
893 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
894 })) {
895 return false;
896 }
897 }
898 return true;
899}
900
901// We currently support LCSSA PHI nodes in the outer loop exit, if their
902// incoming values do not come from the outer loop latch or if the
903// outer loop latch has a single predecessor. In that case, the value will
904// be available if both the inner and outer loop conditions are true, which
905// will still be true after interchanging. If we have multiple predecessor,
906// that may not be the case, e.g. because the outer loop latch may be executed
907// if the inner loop is not executed.
908static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
909 BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
910 for (PHINode &PHI : LoopNestExit->phis()) {
911 for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
912 Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
913 if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
914 continue;
915
916 // The incoming value is defined in the outer loop latch. Currently we
917 // only support that in case the outer loop latch has a single predecessor.
918 // This guarantees that the outer loop latch is executed if and only if
919 // the inner loop is executed (because tightlyNested() guarantees that the
920 // outer loop header only branches to the inner loop or the outer loop
921 // latch).
922 // FIXME: We could weaken this logic and allow multiple predecessors,
923 // if the values are produced outside the loop latch. We would need
924 // additional logic to update the PHI nodes in the exit block as
925 // well.
926 if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
927 return false;
928 }
929 }
930 return true;
931}
932
933// In case of multi-level nested loops, it may occur that lcssa phis exist in
934// the latch of InnerLoop, i.e., when defs of the incoming values are further
935// inside the loopnest. Sometimes those incoming values are not available
936// after interchange, since the original inner latch will become the new outer
937// latch which may have predecessor paths that do not include those incoming
938// values.
939// TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of
940// multi-level loop nests.
941static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
942 if (InnerLoop->getSubLoops().empty())
943 return true;
944 // If the original outer latch has only one predecessor, then values defined
945 // further inside the looploop, e.g., in the innermost loop, will be available
946 // at the new outer latch after interchange.
947 if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr)
948 return true;
949
950 // The outer latch has more than one predecessors, i.e., the inner
951 // exit and the inner header.
952 // PHI nodes in the inner latch are lcssa phis where the incoming values
953 // are defined further inside the loopnest. Check if those phis are used
954 // in the original inner latch. If that is the case then bail out since
955 // those incoming values may not be available at the new outer latch.
956 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
957 for (PHINode &PHI : InnerLoopLatch->phis()) {
958 for (auto *U : PHI.users()) {
959 Instruction *UI = cast<Instruction>(U);
960 if (InnerLoopLatch == UI->getParent())
961 return false;
962 }
963 }
964 return true;
965}
966
967bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
968 unsigned OuterLoopId,
969 CharMatrix &DepMatrix) {
970 if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
971 LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
972 << " and OuterLoopId = " << OuterLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
973 << " due to dependence\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
;
974 ORE->emit([&]() {
975 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "Dependence",
976 InnerLoop->getStartLoc(),
977 InnerLoop->getHeader())
978 << "Cannot interchange loops due to dependences.";
979 });
980 return false;
981 }
982 // Check if outer and inner loop contain legal instructions only.
983 for (auto *BB : OuterLoop->blocks())
984 for (Instruction &I : BB->instructionsWithoutDebug())
985 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
986 // readnone functions do not prevent interchanging.
987 if (CI->onlyWritesMemory())
988 continue;
989 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
990 dbgs() << "Loops with call instructions cannot be interchanged "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
991 << "safely.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
;
992 ORE->emit([&]() {
993 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "CallInst",
994 CI->getDebugLoc(),
995 CI->getParent())
996 << "Cannot interchange loops due to call instruction.";
997 });
998
999 return false;
1000 }
1001
1002 if (!findInductions(InnerLoop, InnerLoopInductions)) {
1003 LLVM_DEBUG(dbgs() << "Cound not find inner loop induction variables.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cound not find inner loop induction variables.\n"
; } } while (false)
;
1004 return false;
1005 }
1006
1007 if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) {
1008 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in inner loop latch.\n"
; } } while (false)
;
1009 ORE->emit([&]() {
1010 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedInnerLatchPHI",
1011 InnerLoop->getStartLoc(),
1012 InnerLoop->getHeader())
1013 << "Cannot interchange loops because unsupported PHI nodes found "
1014 "in inner loop latch.";
1015 });
1016 return false;
1017 }
1018
1019 // TODO: The loops could not be interchanged due to current limitations in the
1020 // transform module.
1021 if (currentLimitations()) {
1022 LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not legal because of current transform limitation\n"
; } } while (false)
;
1023 return false;
1024 }
1025
1026 // Check if the loops are tightly nested.
1027 if (!tightlyNested(OuterLoop, InnerLoop)) {
1028 LLVM_DEBUG(dbgs() << "Loops not tightly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops not tightly nested\n"
; } } while (false)
;
1029 ORE->emit([&]() {
1030 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NotTightlyNested",
1031 InnerLoop->getStartLoc(),
1032 InnerLoop->getHeader())
1033 << "Cannot interchange loops because they are not tightly "
1034 "nested.";
1035 });
1036 return false;
1037 }
1038
1039 if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1040 OuterInnerReductions)) {
1041 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in inner loop exit.\n"
; } } while (false)
;
1042 ORE->emit([&]() {
1043 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedExitPHI",
1044 InnerLoop->getStartLoc(),
1045 InnerLoop->getHeader())
1046 << "Found unsupported PHI node in loop exit.";
1047 });
1048 return false;
1049 }
1050
1051 if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1052 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in outer loop exit.\n"
; } } while (false)
;
1053 ORE->emit([&]() {
1054 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedExitPHI",
1055 OuterLoop->getStartLoc(),
1056 OuterLoop->getHeader())
1057 << "Found unsupported PHI node in loop exit.";
1058 });
1059 return false;
1060 }
1061
1062 return true;
1063}
1064
1065int LoopInterchangeProfitability::getInstrOrderCost() {
1066 unsigned GoodOrder, BadOrder;
1067 BadOrder = GoodOrder = 0;
1068 for (BasicBlock *BB : InnerLoop->blocks()) {
1069 for (Instruction &Ins : *BB) {
1070 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1071 unsigned NumOp = GEP->getNumOperands();
1072 bool FoundInnerInduction = false;
1073 bool FoundOuterInduction = false;
1074 for (unsigned i = 0; i < NumOp; ++i) {
1075 // Skip operands that are not SCEV-able.
1076 if (!SE->isSCEVable(GEP->getOperand(i)->getType()))
1077 continue;
1078
1079 const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
1080 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1081 if (!AR)
1082 continue;
1083
1084 // If we find the inner induction after an outer induction e.g.
1085 // for(int i=0;i<N;i++)
1086 // for(int j=0;j<N;j++)
1087 // A[i][j] = A[i-1][j-1]+k;
1088 // then it is a good order.
1089 if (AR->getLoop() == InnerLoop) {
1090 // We found an InnerLoop induction after OuterLoop induction. It is
1091 // a good order.
1092 FoundInnerInduction = true;
1093 if (FoundOuterInduction) {
1094 GoodOrder++;
1095 break;
1096 }
1097 }
1098 // If we find the outer induction after an inner induction e.g.
1099 // for(int i=0;i<N;i++)
1100 // for(int j=0;j<N;j++)
1101 // A[j][i] = A[j-1][i-1]+k;
1102 // then it is a bad order.
1103 if (AR->getLoop() == OuterLoop) {
1104 // We found an OuterLoop induction after InnerLoop induction. It is
1105 // a bad order.
1106 FoundOuterInduction = true;
1107 if (FoundInnerInduction) {
1108 BadOrder++;
1109 break;
1110 }
1111 }
1112 }
1113 }
1114 }
1115 }
1116 return GoodOrder - BadOrder;
1117}
1118
1119static bool isProfitableForVectorization(unsigned InnerLoopId,
1120 unsigned OuterLoopId,
1121 CharMatrix &DepMatrix) {
1122 // TODO: Improve this heuristic to catch more cases.
1123 // If the inner loop is loop independent or doesn't carry any dependency it is
1124 // profitable to move this to outer position.
1125 for (auto &Row : DepMatrix) {
1126 if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I')
1127 return false;
1128 // TODO: We need to improve this heuristic.
1129 if (Row[OuterLoopId] != '=')
1130 return false;
1131 }
1132 // If outer loop has dependence and inner loop is loop independent then it is
1133 // profitable to interchange to enable parallelism.
1134 // If there are no dependences, interchanging will not improve anything.
1135 return !DepMatrix.empty();
1136}
1137
1138bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
1139 unsigned OuterLoopId,
1140 CharMatrix &DepMatrix) {
1141 // TODO: Add better profitability checks.
1142 // e.g
1143 // 1) Construct dependency matrix and move the one with no loop carried dep
1144 // inside to enable vectorization.
1145
1146 // This is rough cost estimation algorithm. It counts the good and bad order
1147 // of induction variables in the instruction and allows reordering if number
1148 // of bad orders is more than good.
1149 int Cost = getInstrOrderCost();
1150 LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cost = " << Cost
<< "\n"; } } while (false)
;
1151 if (Cost < -LoopInterchangeCostThreshold)
1152 return true;
1153
1154 // It is not profitable as per current cache profitability model. But check if
1155 // we can move this loop outside to improve parallelism.
1156 if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
1157 return true;
1158
1159 ORE->emit([&]() {
1160 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "InterchangeNotProfitable",
1161 InnerLoop->getStartLoc(),
1162 InnerLoop->getHeader())
1163 << "Interchanging loops is too costly (cost="
1164 << ore::NV("Cost", Cost) << ", threshold="
1165 << ore::NV("Threshold", LoopInterchangeCostThreshold)
1166 << ") and it does not improve parallelism.";
1167 });
1168 return false;
1169}
1170
1171void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1172 Loop *InnerLoop) {
1173 for (Loop *L : *OuterLoop)
1174 if (L == InnerLoop) {
1175 OuterLoop->removeChildLoop(L);
1176 return;
1177 }
1178 llvm_unreachable("Couldn't find loop")::llvm::llvm_unreachable_internal("Couldn't find loop", "llvm/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1178)
;
1179}
1180
1181/// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1182/// new inner and outer loop after interchanging: NewInner is the original
1183/// outer loop and NewOuter is the original inner loop.
1184///
1185/// Before interchanging, we have the following structure
1186/// Outer preheader
1187// Outer header
1188// Inner preheader
1189// Inner header
1190// Inner body
1191// Inner latch
1192// outer bbs
1193// Outer latch
1194//
1195// After interchanging:
1196// Inner preheader
1197// Inner header
1198// Outer preheader
1199// Outer header
1200// Inner body
1201// outer bbs
1202// Outer latch
1203// Inner latch
1204void LoopInterchangeTransform::restructureLoops(
1205 Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1206 BasicBlock *OrigOuterPreHeader) {
1207 Loop *OuterLoopParent = OuterLoop->getParentLoop();
1208 // The original inner loop preheader moves from the new inner loop to
1209 // the parent loop, if there is one.
1210 NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1211 LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1212
1213 // Switch the loop levels.
1214 if (OuterLoopParent) {
1215 // Remove the loop from its parent loop.
1216 removeChildLoop(OuterLoopParent, NewInner);
1217 removeChildLoop(NewInner, NewOuter);
1218 OuterLoopParent->addChildLoop(NewOuter);
1219 } else {
1220 removeChildLoop(NewInner, NewOuter);
1221 LI->changeTopLevelLoop(NewInner, NewOuter);
1222 }
1223 while (!NewOuter->isInnermost())
1224 NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1225 NewOuter->addChildLoop(NewInner);
1226
1227 // BBs from the original inner loop.
1228 SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1229
1230 // Add BBs from the original outer loop to the original inner loop (excluding
1231 // BBs already in inner loop)
1232 for (BasicBlock *BB : NewInner->blocks())
1233 if (LI->getLoopFor(BB) == NewInner)
1234 NewOuter->addBlockEntry(BB);
1235
1236 // Now remove inner loop header and latch from the new inner loop and move
1237 // other BBs (the loop body) to the new inner loop.
1238 BasicBlock *OuterHeader = NewOuter->getHeader();
1239 BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1240 for (BasicBlock *BB : OrigInnerBBs) {
1241 // Nothing will change for BBs in child loops.
1242 if (LI->getLoopFor(BB) != NewOuter)
1243 continue;
1244 // Remove the new outer loop header and latch from the new inner loop.
1245 if (BB == OuterHeader || BB == OuterLatch)
1246 NewInner->removeBlockFromLoop(BB);
1247 else
1248 LI->changeLoopFor(BB, NewInner);
1249 }
1250
1251 // The preheader of the original outer loop becomes part of the new
1252 // outer loop.
1253 NewOuter->addBlockEntry(OrigOuterPreHeader);
1254 LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1255
1256 // Tell SE that we move the loops around.
1257 SE->forgetLoop(NewOuter);
1258 SE->forgetLoop(NewInner);
1259}
1260
1261bool LoopInterchangeTransform::transform() {
1262 bool Transformed = false;
1263
1264 if (InnerLoop->getSubLoops().empty()) {
1265 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1266 LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Splitting the inner loop latch\n"
; } } while (false)
;
1267 auto &InductionPHIs = LIL.getInnerLoopInductions();
1268 if (InductionPHIs.empty()) {
1269 LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to find the point to split loop latch \n"
; } } while (false)
;
1270 return false;
1271 }
1272
1273 SmallVector<Instruction *, 8> InnerIndexVarList;
1274 for (PHINode *CurInductionPHI : InductionPHIs) {
1275 if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1276 InnerIndexVarList.push_back(
1277 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
1278 else
1279 InnerIndexVarList.push_back(
1280 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
1281 }
1282
1283 // Create a new latch block for the inner loop. We split at the
1284 // current latch's terminator and then move the condition and all
1285 // operands that are not either loop-invariant or the induction PHI into the
1286 // new latch block.
1287 BasicBlock *NewLatch =
1288 SplitBlock(InnerLoop->getLoopLatch(),
1289 InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
1290
1291 SmallSetVector<Instruction *, 4> WorkList;
1292 unsigned i = 0;
1293 auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() {
1294 for (; i < WorkList.size(); i++) {
1295 // Duplicate instruction and move it the new latch. Update uses that
1296 // have been moved.
1297 Instruction *NewI = WorkList[i]->clone();
1298 NewI->insertBefore(NewLatch->getFirstNonPHI());
1299 assert(!NewI->mayHaveSideEffects() &&(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1301, __extension__
__PRETTY_FUNCTION__))
1300 "Moving instructions with side-effects may change behavior of "(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1301, __extension__
__PRETTY_FUNCTION__))
1301 "the loop nest!")(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1301, __extension__
__PRETTY_FUNCTION__))
;
1302 for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) {
1303 Instruction *UserI = cast<Instruction>(U.getUser());
1304 if (!InnerLoop->contains(UserI->getParent()) ||
1305 UserI->getParent() == NewLatch ||
1306 llvm::is_contained(InductionPHIs, UserI))
1307 U.set(NewI);
1308 }
1309 // Add operands of moved instruction to the worklist, except if they are
1310 // outside the inner loop or are the induction PHI.
1311 for (Value *Op : WorkList[i]->operands()) {
1312 Instruction *OpI = dyn_cast<Instruction>(Op);
1313 if (!OpI ||
1314 this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
1315 llvm::is_contained(InductionPHIs, OpI))
1316 continue;
1317 WorkList.insert(OpI);
1318 }
1319 }
1320 };
1321
1322 // FIXME: Should we interchange when we have a constant condition?
1323 Instruction *CondI = dyn_cast<Instruction>(
1324 cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
1325 ->getCondition());
1326 if (CondI)
1327 WorkList.insert(CondI);
1328 MoveInstructions();
1329 for (Instruction *InnerIndexVar : InnerIndexVarList)
1330 WorkList.insert(cast<Instruction>(InnerIndexVar));
1331 MoveInstructions();
1332
1333 // Splits the inner loops phi nodes out into a separate basic block.
1334 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1335 SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1336 LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "splitting InnerLoopHeader done\n"
; } } while (false)
;
1337 }
1338
1339 // Instructions in the original inner loop preheader may depend on values
1340 // defined in the outer loop header. Move them there, because the original
1341 // inner loop preheader will become the entry into the interchanged loop nest.
1342 // Currently we move all instructions and rely on LICM to move invariant
1343 // instructions outside the loop nest.
1344 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1345 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1346 if (InnerLoopPreHeader != OuterLoopHeader) {
1347 SmallPtrSet<Instruction *, 4> NeedsMoving;
1348 for (Instruction &I :
1349 make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
1350 std::prev(InnerLoopPreHeader->end()))))
1351 I.moveBefore(OuterLoopHeader->getTerminator());
1352 }
1353
1354 Transformed |= adjustLoopLinks();
1355 if (!Transformed) {
1356 LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "adjustLoopLinks failed\n"
; } } while (false)
;
1357 return false;
1358 }
1359
1360 return true;
1361}
1362
1363/// \brief Move all instructions except the terminator from FromBB right before
1364/// InsertBefore
1365static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1366 auto &ToList = InsertBefore->getParent()->getInstList();
1367 auto &FromList = FromBB->getInstList();
1368
1369 ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
1370 FromBB->getTerminator()->getIterator());
1371}
1372
1373/// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
1374static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
1375 // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
1376 // from BB1 afterwards.
1377 auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
1378 SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
1379 for (Instruction *I : TempInstrs)
1380 I->removeFromParent();
1381
1382 // Move instructions from BB2 to BB1.
1383 moveBBContents(BB2, BB1->getTerminator());
1384
1385 // Move instructions from TempInstrs to BB2.
1386 for (Instruction *I : TempInstrs)
1387 I->insertBefore(BB2->getTerminator());
1388}
1389
1390// Update BI to jump to NewBB instead of OldBB. Records updates to the
1391// dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1392// \p OldBB is exactly once in BI's successor list.
1393static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1394 BasicBlock *NewBB,
1395 std::vector<DominatorTree::UpdateType> &DTUpdates,
1396 bool MustUpdateOnce = true) {
1397 assert((!MustUpdateOnce ||(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1401, __extension__
__PRETTY_FUNCTION__))
1398 llvm::count_if(successors(BI),(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1401, __extension__
__PRETTY_FUNCTION__))
1399 [OldBB](BasicBlock *BB) {(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1401, __extension__
__PRETTY_FUNCTION__))
1400 return BB == OldBB;(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1401, __extension__
__PRETTY_FUNCTION__))
1401 }) == 1) && "BI must jump to OldBB exactly once.")(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1401, __extension__
__PRETTY_FUNCTION__))
;
1402 bool Changed = false;
1403 for (Use &Op : BI->operands())
1404 if (Op == OldBB) {
1405 Op.set(NewBB);
1406 Changed = true;
1407 }
1408
1409 if (Changed) {
1410 DTUpdates.push_back(
1411 {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
1412 DTUpdates.push_back(
1413 {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
1414 }
1415 assert(Changed && "Expected a successor to be updated")(static_cast <bool> (Changed && "Expected a successor to be updated"
) ? void (0) : __assert_fail ("Changed && \"Expected a successor to be updated\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1415, __extension__
__PRETTY_FUNCTION__))
;
1416}
1417
1418// Move Lcssa PHIs to the right place.
1419static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
1420 BasicBlock *InnerLatch, BasicBlock *OuterHeader,
1421 BasicBlock *OuterLatch, BasicBlock *OuterExit,
1422 Loop *InnerLoop, LoopInfo *LI) {
1423
1424 // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
1425 // defined either in the header or latch. Those blocks will become header and
1426 // latch of the new outer loop, and the only possible users can PHI nodes
1427 // in the exit block of the loop nest or the outer loop header (reduction
1428 // PHIs, in that case, the incoming value must be defined in the inner loop
1429 // header). We can just substitute the user with the incoming value and remove
1430 // the PHI.
1431 for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
1432 assert(P.getNumIncomingValues() == 1 &&(static_cast <bool> (P.getNumIncomingValues() == 1 &&
"Only loops with a single exit are supported!") ? void (0) :
__assert_fail ("P.getNumIncomingValues() == 1 && \"Only loops with a single exit are supported!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1433, __extension__
__PRETTY_FUNCTION__))
1433 "Only loops with a single exit are supported!")(static_cast <bool> (P.getNumIncomingValues() == 1 &&
"Only loops with a single exit are supported!") ? void (0) :
__assert_fail ("P.getNumIncomingValues() == 1 && \"Only loops with a single exit are supported!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1433, __extension__
__PRETTY_FUNCTION__))
;
1434
1435 // Incoming values are guaranteed be instructions currently.
1436 auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
1437 // In case of multi-level nested loops, follow LCSSA to find the incoming
1438 // value defined from the innermost loop.
1439 auto IncIInnerMost = cast<Instruction>(followLCSSA(IncI));
1440 // Skip phis with incoming values from the inner loop body, excluding the
1441 // header and latch.
1442 if (IncIInnerMost->getParent() != InnerLatch &&
1443 IncIInnerMost->getParent() != InnerHeader)
1444 continue;
1445
1446 assert(all_of(P.users(),(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1454, __extension__
__PRETTY_FUNCTION__))
1447 [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1454, __extension__
__PRETTY_FUNCTION__))
1448 return (cast<PHINode>(U)->getParent() == OuterHeader &&(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1454, __extension__
__PRETTY_FUNCTION__))
1449 IncI->getParent() == InnerHeader) ||(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1454, __extension__
__PRETTY_FUNCTION__))
1450 cast<PHINode>(U)->getParent() == OuterExit;(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1454, __extension__
__PRETTY_FUNCTION__))
1451 }) &&(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1454, __extension__
__PRETTY_FUNCTION__))
1452 "Can only replace phis iff the uses are in the loop nest exit or "(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1454, __extension__
__PRETTY_FUNCTION__))
1453 "the incoming value is defined in the inner header (it will "(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1454, __extension__
__PRETTY_FUNCTION__))
1454 "dominate all loop blocks after interchanging)")(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1454, __extension__
__PRETTY_FUNCTION__))
;
1455 P.replaceAllUsesWith(IncI);
1456 P.eraseFromParent();
1457 }
1458
1459 SmallVector<PHINode *, 8> LcssaInnerExit;
1460 for (PHINode &P : InnerExit->phis())
1461 LcssaInnerExit.push_back(&P);
1462
1463 SmallVector<PHINode *, 8> LcssaInnerLatch;
1464 for (PHINode &P : InnerLatch->phis())
1465 LcssaInnerLatch.push_back(&P);
1466
1467 // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1468 // If a PHI node has users outside of InnerExit, it has a use outside the
1469 // interchanged loop and we have to preserve it. We move these to
1470 // InnerLatch, which will become the new exit block for the innermost
1471 // loop after interchanging.
1472 for (PHINode *P : LcssaInnerExit)
1473 P->moveBefore(InnerLatch->getFirstNonPHI());
1474
1475 // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1476 // and we have to move them to the new inner latch.
1477 for (PHINode *P : LcssaInnerLatch)
1478 P->moveBefore(InnerExit->getFirstNonPHI());
1479
1480 // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1481 // incoming values defined in the outer loop, we have to add a new PHI
1482 // in the inner loop latch, which became the exit block of the outer loop,
1483 // after interchanging.
1484 if (OuterExit) {
1485 for (PHINode &P : OuterExit->phis()) {
1486 if (P.getNumIncomingValues() != 1)
1487 continue;
1488 // Skip Phis with incoming values defined in the inner loop. Those should
1489 // already have been updated.
1490 auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
1491 if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
1492 continue;
1493
1494 PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
1495 NewPhi->setIncomingValue(0, P.getIncomingValue(0));
1496 NewPhi->setIncomingBlock(0, OuterLatch);
1497 // We might have incoming edges from other BBs, i.e., the original outer
1498 // header.
1499 for (auto *Pred : predecessors(InnerLatch)) {
1500 if (Pred == OuterLatch)
1501 continue;
1502 NewPhi->addIncoming(P.getIncomingValue(0), Pred);
1503 }
1504 NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
1505 P.setIncomingValue(0, NewPhi);
1506 }
1507 }
1508
1509 // Now adjust the incoming blocks for the LCSSA PHIs.
1510 // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1511 // with the new latch.
1512 InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
1513}
1514
1515bool LoopInterchangeTransform::adjustLoopBranches() {
1516 LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "adjustLoopBranches called\n"
; } } while (false)
;
1517 std::vector<DominatorTree::UpdateType> DTUpdates;
1518
1519 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1520 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1521
1522 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1524, __extension__
__PRETTY_FUNCTION__))
1523 InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1524, __extension__
__PRETTY_FUNCTION__))
1524 InnerLoopPreHeader && "Guaranteed by loop-simplify form")(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1524, __extension__
__PRETTY_FUNCTION__))
;
1525 // Ensure that both preheaders do not contain PHI nodes and have single
1526 // predecessors. This allows us to move them easily. We use
1527 // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1528 // preheaders do not satisfy those conditions.
1529 if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1530 !OuterLoopPreHeader->getUniquePredecessor())
1531 OuterLoopPreHeader =
1532 InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
1533 if (InnerLoopPreHeader == OuterLoop->getHeader())
1534 InnerLoopPreHeader =
1535 InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
1536
1537 // Adjust the loop preheader
1538 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1539 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1540 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1541 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1542 BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1543 BasicBlock *InnerLoopLatchPredecessor =
1544 InnerLoopLatch->getUniquePredecessor();
1545 BasicBlock *InnerLoopLatchSuccessor;
1546 BasicBlock *OuterLoopLatchSuccessor;
1547
1548 BranchInst *OuterLoopLatchBI =
1549 dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1550 BranchInst *InnerLoopLatchBI =
1551 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1552 BranchInst *OuterLoopHeaderBI =
1553 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1554 BranchInst *InnerLoopHeaderBI =
1555 dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1556
1557 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1558 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1559 !InnerLoopHeaderBI)
1560 return false;
1561
1562 BranchInst *InnerLoopLatchPredecessorBI =
1563 dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1564 BranchInst *OuterLoopPredecessorBI =
1565 dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1566
1567 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1568 return false;
1569 BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1570 if (!InnerLoopHeaderSuccessor)
1571 return false;
1572
1573 // Adjust Loop Preheader and headers.
1574 // The branches in the outer loop predecessor and the outer loop header can
1575 // be unconditional branches or conditional branches with duplicates. Consider
1576 // this when updating the successors.
1577 updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1578 InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
1579 // The outer loop header might or might not branch to the outer latch.
1580 // We are guaranteed to branch to the inner loop preheader.
1581 if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
1582 // In this case the outerLoopHeader should branch to the InnerLoopLatch.
1583 updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
1584 DTUpdates,
1585 /*MustUpdateOnce=*/false);
1586 }
1587 updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1588 InnerLoopHeaderSuccessor, DTUpdates,
1589 /*MustUpdateOnce=*/false);
1590
1591 // Adjust reduction PHI's now that the incoming block has changed.
1592 InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
1593 OuterLoopHeader);
1594
1595 updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1596 OuterLoopPreHeader, DTUpdates);
1597
1598 // -------------Adjust loop latches-----------
1599 if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1600 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1601 else
1602 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1603
1604 updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1605 InnerLoopLatchSuccessor, DTUpdates);
1606
1607 if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1608 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1609 else
1610 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1611
1612 updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1613 OuterLoopLatchSuccessor, DTUpdates);
1614 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1615 DTUpdates);
1616
1617 DT->applyUpdates(DTUpdates);
1618 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1619 OuterLoopPreHeader);
1620
1621 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1622 OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
1623 InnerLoop, LI);
1624 // For PHIs in the exit block of the outer loop, outer's latch has been
1625 // replaced by Inners'.
1626 OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1627
1628 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1629 // Now update the reduction PHIs in the inner and outer loop headers.
1630 SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1631 for (PHINode &PHI : InnerLoopHeader->phis())
1632 if (OuterInnerReductions.contains(&PHI))
1633 InnerLoopPHIs.push_back(&PHI);
1634
1635 for (PHINode &PHI : OuterLoopHeader->phis())
1636 if (OuterInnerReductions.contains(&PHI))
1637 OuterLoopPHIs.push_back(&PHI);
1638
1639 // Now move the remaining reduction PHIs from outer to inner loop header and
1640 // vice versa. The PHI nodes must be part of a reduction across the inner and
1641 // outer loop and all the remains to do is and updating the incoming blocks.
1642 for (PHINode *PHI : OuterLoopPHIs) {
1643 LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Outer loop reduction PHIs:\n"
; PHI->dump();; } } while (false)
;
1644 PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
1645 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node")(static_cast <bool> (OuterInnerReductions.count(PHI) &&
"Expected a reduction PHI node") ? void (0) : __assert_fail (
"OuterInnerReductions.count(PHI) && \"Expected a reduction PHI node\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1645, __extension__
__PRETTY_FUNCTION__))
;
1646 }
1647 for (PHINode *PHI : InnerLoopPHIs) {
1648 LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop reduction PHIs:\n"
; PHI->dump();; } } while (false)
;
1649 PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
1650 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node")(static_cast <bool> (OuterInnerReductions.count(PHI) &&
"Expected a reduction PHI node") ? void (0) : __assert_fail (
"OuterInnerReductions.count(PHI) && \"Expected a reduction PHI node\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1650, __extension__
__PRETTY_FUNCTION__))
;
1651 }
1652
1653 // Update the incoming blocks for moved PHI nodes.
1654 OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
1655 OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
1656 InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
1657 InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1658
1659 // Values defined in the outer loop header could be used in the inner loop
1660 // latch. In that case, we need to create LCSSA phis for them, because after
1661 // interchanging they will be defined in the new inner loop and used in the
1662 // new outer loop.
1663 IRBuilder<> Builder(OuterLoopHeader->getContext());
1664 SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
1665 for (Instruction &I :
1666 make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
1667 MayNeedLCSSAPhis.push_back(&I);
1668 formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE, Builder);
1669
1670 return true;
1671}
1672
1673bool LoopInterchangeTransform::adjustLoopLinks() {
1674 // Adjust all branches in the inner and outer loop.
1675 bool Changed = adjustLoopBranches();
1676 if (Changed) {
1677 // We have interchanged the preheaders so we need to interchange the data in
1678 // the preheaders as well. This is because the content of the inner
1679 // preheader was previously executed inside the outer loop.
1680 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1681 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1682 swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
1683 }
1684 return Changed;
1685}
1686
1687namespace {
1688/// Main LoopInterchange Pass.
1689struct LoopInterchangeLegacyPass : public LoopPass {
1690 static char ID;
1691
1692 LoopInterchangeLegacyPass() : LoopPass(ID) {
1693 initializeLoopInterchangeLegacyPassPass(*PassRegistry::getPassRegistry());
1694 }
1695
1696 void getAnalysisUsage(AnalysisUsage &AU) const override {
1697 AU.addRequired<DependenceAnalysisWrapperPass>();
1698 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1699
1700 getLoopAnalysisUsage(AU);
1701 }
1702
1703 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1704 if (skipLoop(L))
1705 return false;
1706
1707 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1708 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1709 auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
1710 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1711 auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1712
1713 return LoopInterchange(SE, LI, DI, DT, ORE).run(L);
1714 }
1715};
1716} // namespace
1717
1718char LoopInterchangeLegacyPass::ID = 0;
1719
1720INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange",static void *initializeLoopInterchangeLegacyPassPassOnce(PassRegistry
&Registry) {
1721 "Interchanges loops for cache reuse", false, false)static void *initializeLoopInterchangeLegacyPassPassOnce(PassRegistry
&Registry) {
1722INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry);
1723INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)initializeDependenceAnalysisWrapperPassPass(Registry);
1724INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)initializeOptimizationRemarkEmitterWrapperPassPass(Registry);
1725
1726INITIALIZE_PASS_END(LoopInterchangeLegacyPass, "loop-interchange",PassInfo *PI = new PassInfo( "Interchanges loops for cache reuse"
, "loop-interchange", &LoopInterchangeLegacyPass::ID, PassInfo
::NormalCtor_t(callDefaultCtor<LoopInterchangeLegacyPass>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeLoopInterchangeLegacyPassPassFlag
; void llvm::initializeLoopInterchangeLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeLoopInterchangeLegacyPassPassFlag
, initializeLoopInterchangeLegacyPassPassOnce, std::ref(Registry
)); }
1727 "Interchanges loops for cache reuse", false, false)PassInfo *PI = new PassInfo( "Interchanges loops for cache reuse"
, "loop-interchange", &LoopInterchangeLegacyPass::ID, PassInfo
::NormalCtor_t(callDefaultCtor<LoopInterchangeLegacyPass>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeLoopInterchangeLegacyPassPassFlag
; void llvm::initializeLoopInterchangeLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeLoopInterchangeLegacyPassPassFlag
, initializeLoopInterchangeLegacyPassPassOnce, std::ref(Registry
)); }
1728
1729Pass *llvm::createLoopInterchangePass() {
1730 return new LoopInterchangeLegacyPass();
1731}
1732
1733PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
1734 LoopAnalysisManager &AM,
1735 LoopStandardAnalysisResults &AR,
1736 LPMUpdater &U) {
1737 Function &F = *LN.getParent();
1738
1739 DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
1740 OptimizationRemarkEmitter ORE(&F);
1741 if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(LN))
1742 return PreservedAnalyses::all();
1743 return getLoopPassPreservedAnalyses();
1744}