Bug Summary

File:build/llvm-toolchain-snapshot-15~++20220320100729+487629cc61b5/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
Warning:line 1476, column 7
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name LoopInterchange.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220320100729+487629cc61b5/build-llvm -resource-dir /usr/lib/llvm-15/lib/clang/15.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-15~++20220320100729+487629cc61b5/llvm/lib/Transforms/Scalar -I include -I /build/llvm-toolchain-snapshot-15~++20220320100729+487629cc61b5/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-15/lib/clang/15.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220320100729+487629cc61b5/build-llvm=build-llvm -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220320100729+487629cc61b5/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220320100729+487629cc61b5/build-llvm=build-llvm -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220320100729+487629cc61b5/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220320100729+487629cc61b5/build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220320100729+487629cc61b5/build-llvm=build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220320100729+487629cc61b5/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-03-20-232535-108605-1 -x c++ /build/llvm-toolchain-snapshot-15~++20220320100729+487629cc61b5/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
1//===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This Pass handles loop interchange transform.
10// This pass interchanges loops to provide a more cache-friendly memory access
11// patterns.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/Scalar/LoopInterchange.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/Analysis/DependenceAnalysis.h"
21#include "llvm/Analysis/LoopInfo.h"
22#include "llvm/Analysis/LoopNestAnalysis.h"
23#include "llvm/Analysis/LoopPass.h"
24#include "llvm/Analysis/OptimizationRemarkEmitter.h"
25#include "llvm/Analysis/ScalarEvolution.h"
26#include "llvm/Analysis/ScalarEvolutionExpressions.h"
27#include "llvm/IR/BasicBlock.h"
28#include "llvm/IR/Constants.h"
29#include "llvm/IR/DiagnosticInfo.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/IRBuilder.h"
33#include "llvm/IR/InstrTypes.h"
34#include "llvm/IR/Instruction.h"
35#include "llvm/IR/Instructions.h"
36#include "llvm/IR/User.h"
37#include "llvm/IR/Value.h"
38#include "llvm/InitializePasses.h"
39#include "llvm/Pass.h"
40#include "llvm/Support/Casting.h"
41#include "llvm/Support/CommandLine.h"
42#include "llvm/Support/Debug.h"
43#include "llvm/Support/ErrorHandling.h"
44#include "llvm/Support/raw_ostream.h"
45#include "llvm/Transforms/Scalar.h"
46#include "llvm/Transforms/Utils/BasicBlockUtils.h"
47#include "llvm/Transforms/Utils/LoopUtils.h"
48#include <cassert>
49#include <utility>
50#include <vector>
51
52using namespace llvm;
53
54#define DEBUG_TYPE"loop-interchange" "loop-interchange"
55
56STATISTIC(LoopsInterchanged, "Number of loops interchanged")static llvm::Statistic LoopsInterchanged = {"loop-interchange"
, "LoopsInterchanged", "Number of loops interchanged"}
;
57
58static cl::opt<int> LoopInterchangeCostThreshold(
59 "loop-interchange-threshold", cl::init(0), cl::Hidden,
60 cl::desc("Interchange if you gain more than this number"));
61
62namespace {
63
64using LoopVector = SmallVector<Loop *, 8>;
65
66// TODO: Check if we can use a sparse matrix here.
67using CharMatrix = std::vector<std::vector<char>>;
68
69} // end anonymous namespace
70
71// Maximum number of dependencies that can be handled in the dependency matrix.
72static const unsigned MaxMemInstrCount = 100;
73
74// Maximum loop depth supported.
75static const unsigned MaxLoopNestDepth = 10;
76
77#ifdef DUMP_DEP_MATRICIES
78static void printDepMatrix(CharMatrix &DepMatrix) {
79 for (auto &Row : DepMatrix) {
80 for (auto D : Row)
81 LLVM_DEBUG(dbgs() << D << " ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << D << " "; } } while
(false)
;
82 LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "\n"; } } while (false
)
;
83 }
84}
85#endif
86
87static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
88 Loop *L, DependenceInfo *DI) {
89 using ValueVector = SmallVector<Value *, 16>;
90
91 ValueVector MemInstr;
92
93 // For each block.
94 for (BasicBlock *BB : L->blocks()) {
95 // Scan the BB and collect legal loads and stores.
96 for (Instruction &I : *BB) {
97 if (!isa<Instruction>(I))
98 return false;
99 if (auto *Ld = dyn_cast<LoadInst>(&I)) {
100 if (!Ld->isSimple())
101 return false;
102 MemInstr.push_back(&I);
103 } else if (auto *St = dyn_cast<StoreInst>(&I)) {
104 if (!St->isSimple())
105 return false;
106 MemInstr.push_back(&I);
107 }
108 }
109 }
110
111 LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found " << MemInstr
.size() << " Loads and Stores to analyze\n"; } } while (
false)
112 << " Loads and Stores to analyze\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found " << MemInstr
.size() << " Loads and Stores to analyze\n"; } } while (
false)
;
113
114 ValueVector::iterator I, IE, J, JE;
115
116 for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
117 for (J = I, JE = MemInstr.end(); J != JE; ++J) {
118 std::vector<char> Dep;
119 Instruction *Src = cast<Instruction>(*I);
120 Instruction *Dst = cast<Instruction>(*J);
121 // Ignore Input dependencies.
122 if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
123 continue;
124 // Track Output, Flow, and Anti dependencies.
125 if (auto D = DI->depends(Src, Dst, true)) {
126 assert(D->isOrdered() && "Expected an output, flow or anti dep.")(static_cast <bool> (D->isOrdered() && "Expected an output, flow or anti dep."
) ? void (0) : __assert_fail ("D->isOrdered() && \"Expected an output, flow or anti dep.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 126, __extension__
__PRETTY_FUNCTION__))
;
127 LLVM_DEBUG(StringRef DepType =do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
128 D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
129 dbgs() << "Found " << DepTypedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
130 << " dependency between Src and Dst\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
131 << " Src:" << *Src << "\n Dst:" << *Dst << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
;
132 unsigned Levels = D->getLevels();
133 char Direction;
134 for (unsigned II = 1; II <= Levels; ++II) {
135 const SCEV *Distance = D->getDistance(II);
136 const SCEVConstant *SCEVConst =
137 dyn_cast_or_null<SCEVConstant>(Distance);
138 if (SCEVConst) {
139 const ConstantInt *CI = SCEVConst->getValue();
140 if (CI->isNegative())
141 Direction = '<';
142 else if (CI->isZero())
143 Direction = '=';
144 else
145 Direction = '>';
146 Dep.push_back(Direction);
147 } else if (D->isScalar(II)) {
148 Direction = 'S';
149 Dep.push_back(Direction);
150 } else {
151 unsigned Dir = D->getDirection(II);
152 if (Dir == Dependence::DVEntry::LT ||
153 Dir == Dependence::DVEntry::LE)
154 Direction = '<';
155 else if (Dir == Dependence::DVEntry::GT ||
156 Dir == Dependence::DVEntry::GE)
157 Direction = '>';
158 else if (Dir == Dependence::DVEntry::EQ)
159 Direction = '=';
160 else
161 Direction = '*';
162 Dep.push_back(Direction);
163 }
164 }
165 while (Dep.size() != Level) {
166 Dep.push_back('I');
167 }
168
169 DepMatrix.push_back(Dep);
170 if (DepMatrix.size() > MaxMemInstrCount) {
171 LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle more than "
<< MaxMemInstrCount << " dependencies inside loop\n"
; } } while (false)
172 << " dependencies inside loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle more than "
<< MaxMemInstrCount << " dependencies inside loop\n"
; } } while (false)
;
173 return false;
174 }
175 }
176 }
177 }
178
179 return true;
180}
181
182// A loop is moved from index 'from' to an index 'to'. Update the Dependence
183// matrix by exchanging the two columns.
184static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
185 unsigned ToIndx) {
186 for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I)
187 std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]);
188}
189
190// Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
191// '>'
192static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
193 unsigned Column) {
194 for (unsigned i = 0; i <= Column; ++i) {
195 if (DepMatrix[Row][i] == '<')
196 return false;
197 if (DepMatrix[Row][i] == '>')
198 return true;
199 }
200 // All dependencies were '=','S' or 'I'
201 return false;
202}
203
204// Checks if no dependence exist in the dependency matrix in Row before Column.
205static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
206 unsigned Column) {
207 for (unsigned i = 0; i < Column; ++i) {
208 if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' &&
209 DepMatrix[Row][i] != 'I')
210 return false;
211 }
212 return true;
213}
214
215static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
216 unsigned OuterLoopId, char InnerDep,
217 char OuterDep) {
218 if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
219 return false;
220
221 if (InnerDep == OuterDep)
222 return true;
223
224 // It is legal to interchange if and only if after interchange no row has a
225 // '>' direction as the leftmost non-'='.
226
227 if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
228 return true;
229
230 if (InnerDep == '<')
231 return true;
232
233 if (InnerDep == '>') {
234 // If OuterLoopId represents outermost loop then interchanging will make the
235 // 1st dependency as '>'
236 if (OuterLoopId == 0)
237 return false;
238
239 // If all dependencies before OuterloopId are '=','S'or 'I'. Then
240 // interchanging will result in this row having an outermost non '='
241 // dependency of '>'
242 if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
243 return true;
244 }
245
246 return false;
247}
248
249// Checks if it is legal to interchange 2 loops.
250// [Theorem] A permutation of the loops in a perfect nest is legal if and only
251// if the direction matrix, after the same permutation is applied to its
252// columns, has no ">" direction as the leftmost non-"=" direction in any row.
253static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
254 unsigned InnerLoopId,
255 unsigned OuterLoopId) {
256 unsigned NumRows = DepMatrix.size();
257 // For each row check if it is valid to interchange.
258 for (unsigned Row = 0; Row < NumRows; ++Row) {
259 char InnerDep = DepMatrix[Row][InnerLoopId];
260 char OuterDep = DepMatrix[Row][OuterLoopId];
261 if (InnerDep == '*' || OuterDep == '*')
262 return false;
263 if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))
264 return false;
265 }
266 return true;
267}
268
269static LoopVector populateWorklist(Loop &L) {
270 LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
271 << L.getHeader()->getParent()->getName() << " Loop: %"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
272 << L.getHeader()->getName() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
;
273 LoopVector LoopList;
274 Loop *CurrentLoop = &L;
275 const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
276 while (!Vec->empty()) {
277 // The current loop has multiple subloops in it hence it is not tightly
278 // nested.
279 // Discard all loops above it added into Worklist.
280 if (Vec->size() != 1)
281 return {};
282
283 LoopList.push_back(CurrentLoop);
284 CurrentLoop = Vec->front();
285 Vec = &CurrentLoop->getSubLoops();
286 }
287 LoopList.push_back(CurrentLoop);
288 return LoopList;
289}
290
291namespace {
292
293/// LoopInterchangeLegality checks if it is legal to interchange the loop.
294class LoopInterchangeLegality {
295public:
296 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
297 OptimizationRemarkEmitter *ORE)
298 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
299
300 /// Check if the loops can be interchanged.
301 bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
302 CharMatrix &DepMatrix);
303
304 /// Discover induction PHIs in the header of \p L. Induction
305 /// PHIs are added to \p Inductions.
306 bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
307
308 /// Check if the loop structure is understood. We do not handle triangular
309 /// loops for now.
310 bool isLoopStructureUnderstood();
311
312 bool currentLimitations();
313
314 const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
315 return OuterInnerReductions;
316 }
317
318 const SmallVectorImpl<PHINode *> &getInnerLoopInductions() const {
319 return InnerLoopInductions;
320 }
321
322private:
323 bool tightlyNested(Loop *Outer, Loop *Inner);
324 bool containsUnsafeInstructions(BasicBlock *BB);
325
326 /// Discover induction and reduction PHIs in the header of \p L. Induction
327 /// PHIs are added to \p Inductions, reductions are added to
328 /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
329 /// to be passed as \p InnerLoop.
330 bool findInductionAndReductions(Loop *L,
331 SmallVector<PHINode *, 8> &Inductions,
332 Loop *InnerLoop);
333
334 Loop *OuterLoop;
335 Loop *InnerLoop;
336
337 ScalarEvolution *SE;
338
339 /// Interface to emit optimization remarks.
340 OptimizationRemarkEmitter *ORE;
341
342 /// Set of reduction PHIs taking part of a reduction across the inner and
343 /// outer loop.
344 SmallPtrSet<PHINode *, 4> OuterInnerReductions;
345
346 /// Set of inner loop induction PHIs
347 SmallVector<PHINode *, 8> InnerLoopInductions;
348};
349
350/// LoopInterchangeProfitability checks if it is profitable to interchange the
351/// loop.
352class LoopInterchangeProfitability {
353public:
354 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
355 OptimizationRemarkEmitter *ORE)
356 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
357
358 /// Check if the loop interchange is profitable.
359 bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
360 CharMatrix &DepMatrix);
361
362private:
363 int getInstrOrderCost();
364
365 Loop *OuterLoop;
366 Loop *InnerLoop;
367
368 /// Scev analysis.
369 ScalarEvolution *SE;
370
371 /// Interface to emit optimization remarks.
372 OptimizationRemarkEmitter *ORE;
373};
374
375/// LoopInterchangeTransform interchanges the loop.
376class LoopInterchangeTransform {
377public:
378 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
379 LoopInfo *LI, DominatorTree *DT,
380 const LoopInterchangeLegality &LIL)
381 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
382
383 /// Interchange OuterLoop and InnerLoop.
384 bool transform();
385 void restructureLoops(Loop *NewInner, Loop *NewOuter,
386 BasicBlock *OrigInnerPreHeader,
387 BasicBlock *OrigOuterPreHeader);
388 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
389
390private:
391 bool adjustLoopLinks();
392 bool adjustLoopBranches();
393
394 Loop *OuterLoop;
395 Loop *InnerLoop;
396
397 /// Scev analysis.
398 ScalarEvolution *SE;
399
400 LoopInfo *LI;
401 DominatorTree *DT;
402
403 const LoopInterchangeLegality &LIL;
404};
405
406struct LoopInterchange {
407 ScalarEvolution *SE = nullptr;
408 LoopInfo *LI = nullptr;
409 DependenceInfo *DI = nullptr;
410 DominatorTree *DT = nullptr;
411
412 /// Interface to emit optimization remarks.
413 OptimizationRemarkEmitter *ORE;
414
415 LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
416 DominatorTree *DT, OptimizationRemarkEmitter *ORE)
417 : SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {}
418
419 bool run(Loop *L) {
420 if (L->getParentLoop())
421 return false;
422
423 return processLoopList(populateWorklist(*L));
424 }
425
426 bool run(LoopNest &LN) {
427 const auto &LoopList = LN.getLoops();
428 for (unsigned I = 1; I < LoopList.size(); ++I)
429 if (LoopList[I]->getParentLoop() != LoopList[I - 1])
430 return false;
431 return processLoopList(LoopList);
432 }
433
434 bool isComputableLoopNest(ArrayRef<Loop *> LoopList) {
435 for (Loop *L : LoopList) {
436 const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
437 if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
438 LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Couldn't compute backedge count\n"
; } } while (false)
;
439 return false;
440 }
441 if (L->getNumBackEdges() != 1) {
442 LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "NumBackEdges is not equal to 1\n"
; } } while (false)
;
443 return false;
444 }
445 if (!L->getExitingBlock()) {
446 LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop doesn't have unique exit block\n"
; } } while (false)
;
447 return false;
448 }
449 }
450 return true;
451 }
452
453 unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
454 // TODO: Add a better heuristic to select the loop to be interchanged based
455 // on the dependence matrix. Currently we select the innermost loop.
456 return LoopList.size() - 1;
457 }
458
459 bool processLoopList(ArrayRef<Loop *> LoopList) {
460 bool Changed = false;
461 unsigned LoopNestDepth = LoopList.size();
462 if (LoopNestDepth < 2) {
463 LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop doesn't contain minimum nesting level.\n"
; } } while (false)
;
464 return false;
465 }
466 if (LoopNestDepth > MaxLoopNestDepth) {
467 LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle loops of depth greater than "
<< MaxLoopNestDepth << "\n"; } } while (false)
468 << MaxLoopNestDepth << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle loops of depth greater than "
<< MaxLoopNestDepth << "\n"; } } while (false)
;
469 return false;
470 }
471 if (!isComputableLoopNest(LoopList)) {
472 LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not valid loop candidate for interchange\n"
; } } while (false)
;
473 return false;
474 }
475
476 LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepthdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing LoopList of size = "
<< LoopNestDepth << "\n"; } } while (false)
477 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing LoopList of size = "
<< LoopNestDepth << "\n"; } } while (false)
;
478
479 CharMatrix DependencyMatrix;
480 Loop *OuterMostLoop = *(LoopList.begin());
481 if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
482 OuterMostLoop, DI)) {
483 LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Populating dependency matrix failed\n"
; } } while (false)
;
484 return false;
485 }
486#ifdef DUMP_DEP_MATRICIES
487 LLVM_DEBUG(dbgs() << "Dependence before interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Dependence before interchange\n"
; } } while (false)
;
488 printDepMatrix(DependencyMatrix);
489#endif
490
491 // Get the Outermost loop exit.
492 BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
493 if (!LoopNestExit) {
494 LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "OuterMostLoop needs an unique exit block"
; } } while (false)
;
495 return false;
496 }
497
498 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
499 // Move the selected loop outwards to the best possible position.
500 Loop *LoopToBeInterchanged = LoopList[SelecLoopId];
501 for (unsigned i = SelecLoopId; i > 0; i--) {
502 bool Interchanged = processLoop(LoopToBeInterchanged, LoopList[i - 1], i,
503 i - 1, DependencyMatrix);
504 if (!Interchanged)
505 return Changed;
506 // Update the DependencyMatrix
507 interChangeDependencies(DependencyMatrix, i, i - 1);
508#ifdef DUMP_DEP_MATRICIES
509 LLVM_DEBUG(dbgs() << "Dependence after interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Dependence after interchange\n"
; } } while (false)
;
510 printDepMatrix(DependencyMatrix);
511#endif
512 Changed |= Interchanged;
513 }
514 return Changed;
515 }
516
517 bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
518 unsigned OuterLoopId,
519 std::vector<std::vector<char>> &DependencyMatrix) {
520 LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << "\n"; } } while (false)
521 << " and OuterLoopId = " << OuterLoopId << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << "\n"; } } while (false)
;
522 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
523 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
524 LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not interchanging loops. Cannot prove legality.\n"
; } } while (false)
;
525 return false;
526 }
527 LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops are legal to interchange\n"
; } } while (false)
;
528 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
529 if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
530 LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Interchanging loops not profitable.\n"
; } } while (false)
;
531 return false;
532 }
533
534 ORE->emit([&]() {
535 return OptimizationRemark(DEBUG_TYPE"loop-interchange", "Interchanged",
536 InnerLoop->getStartLoc(),
537 InnerLoop->getHeader())
538 << "Loop interchanged with enclosing loop.";
539 });
540
541 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
542 LIT.transform();
543 LLVM_DEBUG(dbgs() << "Loops interchanged.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops interchanged.\n"
; } } while (false)
;
544 LoopsInterchanged++;
545
546 assert(InnerLoop->isLCSSAForm(*DT) &&(static_cast <bool> (InnerLoop->isLCSSAForm(*DT) &&
"Inner loop not left in LCSSA form after loop interchange!")
? void (0) : __assert_fail ("InnerLoop->isLCSSAForm(*DT) && \"Inner loop not left in LCSSA form after loop interchange!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 547, __extension__
__PRETTY_FUNCTION__))
547 "Inner loop not left in LCSSA form after loop interchange!")(static_cast <bool> (InnerLoop->isLCSSAForm(*DT) &&
"Inner loop not left in LCSSA form after loop interchange!")
? void (0) : __assert_fail ("InnerLoop->isLCSSAForm(*DT) && \"Inner loop not left in LCSSA form after loop interchange!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 547, __extension__
__PRETTY_FUNCTION__))
;
548 assert(OuterLoop->isLCSSAForm(*DT) &&(static_cast <bool> (OuterLoop->isLCSSAForm(*DT) &&
"Outer loop not left in LCSSA form after loop interchange!")
? void (0) : __assert_fail ("OuterLoop->isLCSSAForm(*DT) && \"Outer loop not left in LCSSA form after loop interchange!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 549, __extension__
__PRETTY_FUNCTION__))
549 "Outer loop not left in LCSSA form after loop interchange!")(static_cast <bool> (OuterLoop->isLCSSAForm(*DT) &&
"Outer loop not left in LCSSA form after loop interchange!")
? void (0) : __assert_fail ("OuterLoop->isLCSSAForm(*DT) && \"Outer loop not left in LCSSA form after loop interchange!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 549, __extension__
__PRETTY_FUNCTION__))
;
550
551 return true;
552 }
553};
554
555} // end anonymous namespace
556
557bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
558 return any_of(*BB, [](const Instruction &I) {
559 return I.mayHaveSideEffects() || I.mayReadFromMemory();
560 });
561}
562
563bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
564 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
565 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
566 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
567
568 LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Checking if loops are tightly nested\n"
; } } while (false)
;
569
570 // A perfectly nested loop will not have any branch in between the outer and
571 // inner block i.e. outer header will branch to either inner preheader and
572 // outerloop latch.
573 BranchInst *OuterLoopHeaderBI =
574 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
575 if (!OuterLoopHeaderBI)
576 return false;
577
578 for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
579 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
580 Succ != OuterLoopLatch)
581 return false;
582
583 LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Checking instructions in Loop header and Loop latch\n"
; } } while (false)
;
584 // We do not have any basic block in between now make sure the outer header
585 // and outer loop latch doesn't contain any unsafe instructions.
586 if (containsUnsafeInstructions(OuterLoopHeader) ||
587 containsUnsafeInstructions(OuterLoopLatch))
588 return false;
589
590 // Also make sure the inner loop preheader does not contain any unsafe
591 // instructions. Note that all instructions in the preheader will be moved to
592 // the outer loop header when interchanging.
593 if (InnerLoopPreHeader != OuterLoopHeader &&
594 containsUnsafeInstructions(InnerLoopPreHeader))
595 return false;
596
597 BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
598 // Ensure the inner loop exit block flows to the outer loop latch possibly
599 // through empty blocks.
600 const BasicBlock &SuccInner =
601 LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch);
602 if (&SuccInner != OuterLoopLatch) {
603 LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExitdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop exit block "
<< *InnerLoopExit << " does not lead to the outer loop latch.\n"
;; } } while (false)
604 << " does not lead to the outer loop latch.\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop exit block "
<< *InnerLoopExit << " does not lead to the outer loop latch.\n"
;; } } while (false)
;
605 return false;
606 }
607 // The inner loop exit block does flow to the outer loop latch and not some
608 // other BBs, now make sure it contains safe instructions, since it will be
609 // moved into the (new) inner loop after interchange.
610 if (containsUnsafeInstructions(InnerLoopExit))
611 return false;
612
613 LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops are perfectly nested\n"
; } } while (false)
;
614 // We have a perfect loop nest.
615 return true;
616}
617
618bool LoopInterchangeLegality::isLoopStructureUnderstood() {
619 BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
620 for (PHINode *InnerInduction : InnerLoopInductions) {
621 unsigned Num = InnerInduction->getNumOperands();
622 for (unsigned i = 0; i < Num; ++i) {
623 Value *Val = InnerInduction->getOperand(i);
624 if (isa<Constant>(Val))
625 continue;
626 Instruction *I = dyn_cast<Instruction>(Val);
627 if (!I)
628 return false;
629 // TODO: Handle triangular loops.
630 // e.g. for(int i=0;i<N;i++)
631 // for(int j=i;j<N;j++)
632 unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
633 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
634 InnerLoopPreheader &&
635 !OuterLoop->isLoopInvariant(I)) {
636 return false;
637 }
638 }
639 }
640
641 // TODO: Handle triangular loops of another form.
642 // e.g. for(int i=0;i<N;i++)
643 // for(int j=0;j<i;j++)
644 // or,
645 // for(int i=0;i<N;i++)
646 // for(int j=0;j*i<N;j++)
647 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
648 BranchInst *InnerLoopLatchBI =
649 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
650 if (!InnerLoopLatchBI->isConditional())
651 return false;
652 if (CmpInst *InnerLoopCmp =
653 dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) {
654 Value *Op0 = InnerLoopCmp->getOperand(0);
655 Value *Op1 = InnerLoopCmp->getOperand(1);
656
657 // LHS and RHS of the inner loop exit condition, e.g.,
658 // in "for(int j=0;j<i;j++)", LHS is j and RHS is i.
659 Value *Left = nullptr;
660 Value *Right = nullptr;
661
662 // Check if V only involves inner loop induction variable.
663 // Return true if V is InnerInduction, or a cast from
664 // InnerInduction, or a binary operator that involves
665 // InnerInduction and a constant.
666 std::function<bool(Value *)> IsPathToInnerIndVar;
667 IsPathToInnerIndVar = [this, &IsPathToInnerIndVar](const Value *V) -> bool {
668 if (llvm::is_contained(InnerLoopInductions, V))
669 return true;
670 if (isa<Constant>(V))
671 return true;
672 const Instruction *I = dyn_cast<Instruction>(V);
673 if (!I)
674 return false;
675 if (isa<CastInst>(I))
676 return IsPathToInnerIndVar(I->getOperand(0));
677 if (isa<BinaryOperator>(I))
678 return IsPathToInnerIndVar(I->getOperand(0)) &&
679 IsPathToInnerIndVar(I->getOperand(1));
680 return false;
681 };
682
683 // In case of multiple inner loop indvars, it is okay if LHS and RHS
684 // are both inner indvar related variables.
685 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
686 return true;
687
688 // Otherwise we check if the cmp instruction compares an inner indvar
689 // related variable (Left) with a outer loop invariant (Right).
690 if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
691 Left = Op0;
692 Right = Op1;
693 } else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
694 Left = Op1;
695 Right = Op0;
696 }
697
698 if (Left == nullptr)
699 return false;
700
701 const SCEV *S = SE->getSCEV(Right);
702 if (!SE->isLoopInvariant(S, OuterLoop))
703 return false;
704 }
705
706 return true;
707}
708
709// If SV is a LCSSA PHI node with a single incoming value, return the incoming
710// value.
711static Value *followLCSSA(Value *SV) {
712 PHINode *PHI = dyn_cast<PHINode>(SV);
713 if (!PHI)
714 return SV;
715
716 if (PHI->getNumIncomingValues() != 1)
717 return SV;
718 return followLCSSA(PHI->getIncomingValue(0));
719}
720
721// Check V's users to see if it is involved in a reduction in L.
722static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
723 // Reduction variables cannot be constants.
724 if (isa<Constant>(V))
725 return nullptr;
726
727 for (Value *User : V->users()) {
728 if (PHINode *PHI = dyn_cast<PHINode>(User)) {
729 if (PHI->getNumIncomingValues() == 1)
730 continue;
731 RecurrenceDescriptor RD;
732 if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) {
733 // Detect floating point reduction only when it can be reordered.
734 if (RD.getExactFPMathInst() != nullptr)
735 return nullptr;
736 return PHI;
737 }
738 return nullptr;
739 }
740 }
741
742 return nullptr;
743}
744
745bool LoopInterchangeLegality::findInductionAndReductions(
746 Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
747 if (!L->getLoopLatch() || !L->getLoopPredecessor())
748 return false;
749 for (PHINode &PHI : L->getHeader()->phis()) {
750 RecurrenceDescriptor RD;
751 InductionDescriptor ID;
752 if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
753 Inductions.push_back(&PHI);
754 else {
755 // PHIs in inner loops need to be part of a reduction in the outer loop,
756 // discovered when checking the PHIs of the outer loop earlier.
757 if (!InnerLoop) {
758 if (!OuterInnerReductions.count(&PHI)) {
759 LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop PHI is not part of reductions "
"across the outer loop.\n"; } } while (false)
760 "across the outer loop.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop PHI is not part of reductions "
"across the outer loop.\n"; } } while (false)
;
761 return false;
762 }
763 } else {
764 assert(PHI.getNumIncomingValues() == 2 &&(static_cast <bool> (PHI.getNumIncomingValues() == 2 &&
"Phis in loop header should have exactly 2 incoming values")
? void (0) : __assert_fail ("PHI.getNumIncomingValues() == 2 && \"Phis in loop header should have exactly 2 incoming values\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 765, __extension__
__PRETTY_FUNCTION__))
765 "Phis in loop header should have exactly 2 incoming values")(static_cast <bool> (PHI.getNumIncomingValues() == 2 &&
"Phis in loop header should have exactly 2 incoming values")
? void (0) : __assert_fail ("PHI.getNumIncomingValues() == 2 && \"Phis in loop header should have exactly 2 incoming values\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 765, __extension__
__PRETTY_FUNCTION__))
;
766 // Check if we have a PHI node in the outer loop that has a reduction
767 // result from the inner loop as an incoming value.
768 Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
769 PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
770 if (!InnerRedPhi ||
771 !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
772 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
773 dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
774 << "Failed to recognize PHI as an induction or reduction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
;
775 return false;
776 }
777 OuterInnerReductions.insert(&PHI);
778 OuterInnerReductions.insert(InnerRedPhi);
779 }
780 }
781 }
782 return true;
783}
784
785// This function indicates the current limitations in the transform as a result
786// of which we do not proceed.
787bool LoopInterchangeLegality::currentLimitations() {
788 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
789
790 // transform currently expects the loop latches to also be the exiting
791 // blocks.
792 if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
793 OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
794 !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
795 !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
796 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
797 dbgs() << "Loops where the latch is not the exiting block are not"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
798 << " supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
;
799 ORE->emit([&]() {
800 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "ExitingNotLatch",
801 OuterLoop->getStartLoc(),
802 OuterLoop->getHeader())
803 << "Loops where the latch is not the exiting block cannot be"
804 " interchange currently.";
805 });
806 return true;
807 }
808
809 SmallVector<PHINode *, 8> Inductions;
810 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
811 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
812 dbgs() << "Only outer loops with induction or reduction PHI nodes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
813 << "are supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
;
814 ORE->emit([&]() {
815 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIOuter",
816 OuterLoop->getStartLoc(),
817 OuterLoop->getHeader())
818 << "Only outer loops with induction or reduction PHI nodes can be"
819 " interchanged currently.";
820 });
821 return true;
822 }
823
824 Inductions.clear();
825 if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) {
826 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
827 dbgs() << "Only inner loops with induction or reduction PHI nodes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
828 << "are supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
;
829 ORE->emit([&]() {
830 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIInner",
831 InnerLoop->getStartLoc(),
832 InnerLoop->getHeader())
833 << "Only inner loops with induction or reduction PHI nodes can be"
834 " interchange currently.";
835 });
836 return true;
837 }
838
839 // TODO: Triangular loops are not handled for now.
840 if (!isLoopStructureUnderstood()) {
841 LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop structure not understood by pass\n"
; } } while (false)
;
842 ORE->emit([&]() {
843 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedStructureInner",
844 InnerLoop->getStartLoc(),
845 InnerLoop->getHeader())
846 << "Inner loop structure not understood currently.";
847 });
848 return true;
849 }
850
851 return false;
852}
853
854bool LoopInterchangeLegality::findInductions(
855 Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
856 for (PHINode &PHI : L->getHeader()->phis()) {
857 InductionDescriptor ID;
858 if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
859 Inductions.push_back(&PHI);
860 }
861 return !Inductions.empty();
862}
863
864// We currently only support LCSSA PHI nodes in the inner loop exit, if their
865// users are either reduction PHIs or PHIs outside the outer loop (which means
866// the we are only interested in the final value after the loop).
867static bool
868areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL,
869 SmallPtrSetImpl<PHINode *> &Reductions) {
870 BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
871 for (PHINode &PHI : InnerExit->phis()) {
872 // Reduction lcssa phi will have only 1 incoming block that from loop latch.
873 if (PHI.getNumIncomingValues() > 1)
874 return false;
875 if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
876 PHINode *PN = dyn_cast<PHINode>(U);
877 return !PN ||
878 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
879 })) {
880 return false;
881 }
882 }
883 return true;
884}
885
886// We currently support LCSSA PHI nodes in the outer loop exit, if their
887// incoming values do not come from the outer loop latch or if the
888// outer loop latch has a single predecessor. In that case, the value will
889// be available if both the inner and outer loop conditions are true, which
890// will still be true after interchanging. If we have multiple predecessor,
891// that may not be the case, e.g. because the outer loop latch may be executed
892// if the inner loop is not executed.
893static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
894 BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
895 for (PHINode &PHI : LoopNestExit->phis()) {
896 for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
897 Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
898 if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
899 continue;
900
901 // The incoming value is defined in the outer loop latch. Currently we
902 // only support that in case the outer loop latch has a single predecessor.
903 // This guarantees that the outer loop latch is executed if and only if
904 // the inner loop is executed (because tightlyNested() guarantees that the
905 // outer loop header only branches to the inner loop or the outer loop
906 // latch).
907 // FIXME: We could weaken this logic and allow multiple predecessors,
908 // if the values are produced outside the loop latch. We would need
909 // additional logic to update the PHI nodes in the exit block as
910 // well.
911 if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
912 return false;
913 }
914 }
915 return true;
916}
917
918// In case of multi-level nested loops, it may occur that lcssa phis exist in
919// the latch of InnerLoop, i.e., when defs of the incoming values are further
920// inside the loopnest. Sometimes those incoming values are not available
921// after interchange, since the original inner latch will become the new outer
922// latch which may have predecessor paths that do not include those incoming
923// values.
924// TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of
925// multi-level loop nests.
926static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
927 if (InnerLoop->getSubLoops().empty())
928 return true;
929 // If the original outer latch has only one predecessor, then values defined
930 // further inside the looploop, e.g., in the innermost loop, will be available
931 // at the new outer latch after interchange.
932 if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr)
933 return true;
934
935 // The outer latch has more than one predecessors, i.e., the inner
936 // exit and the inner header.
937 // PHI nodes in the inner latch are lcssa phis where the incoming values
938 // are defined further inside the loopnest. Check if those phis are used
939 // in the original inner latch. If that is the case then bail out since
940 // those incoming values may not be available at the new outer latch.
941 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
942 for (PHINode &PHI : InnerLoopLatch->phis()) {
943 for (auto *U : PHI.users()) {
944 Instruction *UI = cast<Instruction>(U);
945 if (InnerLoopLatch == UI->getParent())
946 return false;
947 }
948 }
949 return true;
950}
951
952bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
953 unsigned OuterLoopId,
954 CharMatrix &DepMatrix) {
955 if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
956 LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
957 << " and OuterLoopId = " << OuterLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
958 << " due to dependence\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
;
959 ORE->emit([&]() {
960 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "Dependence",
961 InnerLoop->getStartLoc(),
962 InnerLoop->getHeader())
963 << "Cannot interchange loops due to dependences.";
964 });
965 return false;
966 }
967 // Check if outer and inner loop contain legal instructions only.
968 for (auto *BB : OuterLoop->blocks())
969 for (Instruction &I : BB->instructionsWithoutDebug())
970 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
971 // readnone functions do not prevent interchanging.
972 if (CI->onlyWritesMemory())
973 continue;
974 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
975 dbgs() << "Loops with call instructions cannot be interchanged "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
976 << "safely.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
;
977 ORE->emit([&]() {
978 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "CallInst",
979 CI->getDebugLoc(),
980 CI->getParent())
981 << "Cannot interchange loops due to call instruction.";
982 });
983
984 return false;
985 }
986
987 if (!findInductions(InnerLoop, InnerLoopInductions)) {
988 LLVM_DEBUG(dbgs() << "Cound not find inner loop induction variables.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cound not find inner loop induction variables.\n"
; } } while (false)
;
989 return false;
990 }
991
992 if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) {
993 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in inner loop latch.\n"
; } } while (false)
;
994 ORE->emit([&]() {
995 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedInnerLatchPHI",
996 InnerLoop->getStartLoc(),
997 InnerLoop->getHeader())
998 << "Cannot interchange loops because unsupported PHI nodes found "
999 "in inner loop latch.";
1000 });
1001 return false;
1002 }
1003
1004 // TODO: The loops could not be interchanged due to current limitations in the
1005 // transform module.
1006 if (currentLimitations()) {
1007 LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not legal because of current transform limitation\n"
; } } while (false)
;
1008 return false;
1009 }
1010
1011 // Check if the loops are tightly nested.
1012 if (!tightlyNested(OuterLoop, InnerLoop)) {
1013 LLVM_DEBUG(dbgs() << "Loops not tightly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops not tightly nested\n"
; } } while (false)
;
1014 ORE->emit([&]() {
1015 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NotTightlyNested",
1016 InnerLoop->getStartLoc(),
1017 InnerLoop->getHeader())
1018 << "Cannot interchange loops because they are not tightly "
1019 "nested.";
1020 });
1021 return false;
1022 }
1023
1024 if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1025 OuterInnerReductions)) {
1026 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in inner loop exit.\n"
; } } while (false)
;
1027 ORE->emit([&]() {
1028 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedExitPHI",
1029 InnerLoop->getStartLoc(),
1030 InnerLoop->getHeader())
1031 << "Found unsupported PHI node in loop exit.";
1032 });
1033 return false;
1034 }
1035
1036 if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1037 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in outer loop exit.\n"
; } } while (false)
;
1038 ORE->emit([&]() {
1039 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedExitPHI",
1040 OuterLoop->getStartLoc(),
1041 OuterLoop->getHeader())
1042 << "Found unsupported PHI node in loop exit.";
1043 });
1044 return false;
1045 }
1046
1047 return true;
1048}
1049
1050int LoopInterchangeProfitability::getInstrOrderCost() {
1051 unsigned GoodOrder, BadOrder;
1052 BadOrder = GoodOrder = 0;
1053 for (BasicBlock *BB : InnerLoop->blocks()) {
1054 for (Instruction &Ins : *BB) {
1055 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1056 unsigned NumOp = GEP->getNumOperands();
1057 bool FoundInnerInduction = false;
1058 bool FoundOuterInduction = false;
1059 for (unsigned i = 0; i < NumOp; ++i) {
1060 // Skip operands that are not SCEV-able.
1061 if (!SE->isSCEVable(GEP->getOperand(i)->getType()))
1062 continue;
1063
1064 const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
1065 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1066 if (!AR)
1067 continue;
1068
1069 // If we find the inner induction after an outer induction e.g.
1070 // for(int i=0;i<N;i++)
1071 // for(int j=0;j<N;j++)
1072 // A[i][j] = A[i-1][j-1]+k;
1073 // then it is a good order.
1074 if (AR->getLoop() == InnerLoop) {
1075 // We found an InnerLoop induction after OuterLoop induction. It is
1076 // a good order.
1077 FoundInnerInduction = true;
1078 if (FoundOuterInduction) {
1079 GoodOrder++;
1080 break;
1081 }
1082 }
1083 // If we find the outer induction after an inner induction e.g.
1084 // for(int i=0;i<N;i++)
1085 // for(int j=0;j<N;j++)
1086 // A[j][i] = A[j-1][i-1]+k;
1087 // then it is a bad order.
1088 if (AR->getLoop() == OuterLoop) {
1089 // We found an OuterLoop induction after InnerLoop induction. It is
1090 // a bad order.
1091 FoundOuterInduction = true;
1092 if (FoundInnerInduction) {
1093 BadOrder++;
1094 break;
1095 }
1096 }
1097 }
1098 }
1099 }
1100 }
1101 return GoodOrder - BadOrder;
1102}
1103
1104static bool isProfitableForVectorization(unsigned InnerLoopId,
1105 unsigned OuterLoopId,
1106 CharMatrix &DepMatrix) {
1107 // TODO: Improve this heuristic to catch more cases.
1108 // If the inner loop is loop independent or doesn't carry any dependency it is
1109 // profitable to move this to outer position.
1110 for (auto &Row : DepMatrix) {
1111 if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I')
1112 return false;
1113 // TODO: We need to improve this heuristic.
1114 if (Row[OuterLoopId] != '=')
1115 return false;
1116 }
1117 // If outer loop has dependence and inner loop is loop independent then it is
1118 // profitable to interchange to enable parallelism.
1119 // If there are no dependences, interchanging will not improve anything.
1120 return !DepMatrix.empty();
1121}
1122
1123bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
1124 unsigned OuterLoopId,
1125 CharMatrix &DepMatrix) {
1126 // TODO: Add better profitability checks.
1127 // e.g
1128 // 1) Construct dependency matrix and move the one with no loop carried dep
1129 // inside to enable vectorization.
1130
1131 // This is rough cost estimation algorithm. It counts the good and bad order
1132 // of induction variables in the instruction and allows reordering if number
1133 // of bad orders is more than good.
1134 int Cost = getInstrOrderCost();
1135 LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cost = " << Cost
<< "\n"; } } while (false)
;
1136 if (Cost < -LoopInterchangeCostThreshold)
1137 return true;
1138
1139 // It is not profitable as per current cache profitability model. But check if
1140 // we can move this loop outside to improve parallelism.
1141 if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
1142 return true;
1143
1144 ORE->emit([&]() {
1145 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "InterchangeNotProfitable",
1146 InnerLoop->getStartLoc(),
1147 InnerLoop->getHeader())
1148 << "Interchanging loops is too costly (cost="
1149 << ore::NV("Cost", Cost) << ", threshold="
1150 << ore::NV("Threshold", LoopInterchangeCostThreshold)
1151 << ") and it does not improve parallelism.";
1152 });
1153 return false;
1154}
1155
1156void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1157 Loop *InnerLoop) {
1158 for (Loop *L : *OuterLoop)
1159 if (L == InnerLoop) {
1160 OuterLoop->removeChildLoop(L);
1161 return;
1162 }
1163 llvm_unreachable("Couldn't find loop")::llvm::llvm_unreachable_internal("Couldn't find loop", "llvm/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1163)
;
1164}
1165
1166/// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1167/// new inner and outer loop after interchanging: NewInner is the original
1168/// outer loop and NewOuter is the original inner loop.
1169///
1170/// Before interchanging, we have the following structure
1171/// Outer preheader
1172// Outer header
1173// Inner preheader
1174// Inner header
1175// Inner body
1176// Inner latch
1177// outer bbs
1178// Outer latch
1179//
1180// After interchanging:
1181// Inner preheader
1182// Inner header
1183// Outer preheader
1184// Outer header
1185// Inner body
1186// outer bbs
1187// Outer latch
1188// Inner latch
1189void LoopInterchangeTransform::restructureLoops(
1190 Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1191 BasicBlock *OrigOuterPreHeader) {
1192 Loop *OuterLoopParent = OuterLoop->getParentLoop();
1193 // The original inner loop preheader moves from the new inner loop to
1194 // the parent loop, if there is one.
1195 NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1196 LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1197
1198 // Switch the loop levels.
1199 if (OuterLoopParent) {
1200 // Remove the loop from its parent loop.
1201 removeChildLoop(OuterLoopParent, NewInner);
1202 removeChildLoop(NewInner, NewOuter);
1203 OuterLoopParent->addChildLoop(NewOuter);
1204 } else {
1205 removeChildLoop(NewInner, NewOuter);
1206 LI->changeTopLevelLoop(NewInner, NewOuter);
1207 }
1208 while (!NewOuter->isInnermost())
1209 NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1210 NewOuter->addChildLoop(NewInner);
1211
1212 // BBs from the original inner loop.
1213 SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1214
1215 // Add BBs from the original outer loop to the original inner loop (excluding
1216 // BBs already in inner loop)
1217 for (BasicBlock *BB : NewInner->blocks())
1218 if (LI->getLoopFor(BB) == NewInner)
1219 NewOuter->addBlockEntry(BB);
1220
1221 // Now remove inner loop header and latch from the new inner loop and move
1222 // other BBs (the loop body) to the new inner loop.
1223 BasicBlock *OuterHeader = NewOuter->getHeader();
1224 BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1225 for (BasicBlock *BB : OrigInnerBBs) {
1226 // Nothing will change for BBs in child loops.
1227 if (LI->getLoopFor(BB) != NewOuter)
1228 continue;
1229 // Remove the new outer loop header and latch from the new inner loop.
1230 if (BB == OuterHeader || BB == OuterLatch)
1231 NewInner->removeBlockFromLoop(BB);
1232 else
1233 LI->changeLoopFor(BB, NewInner);
1234 }
1235
1236 // The preheader of the original outer loop becomes part of the new
1237 // outer loop.
1238 NewOuter->addBlockEntry(OrigOuterPreHeader);
1239 LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1240
1241 // Tell SE that we move the loops around.
1242 SE->forgetLoop(NewOuter);
1243 SE->forgetLoop(NewInner);
1244}
1245
1246bool LoopInterchangeTransform::transform() {
1247 bool Transformed = false;
1248
1249 if (InnerLoop->getSubLoops().empty()) {
1250 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1251 LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Splitting the inner loop latch\n"
; } } while (false)
;
1252 auto &InductionPHIs = LIL.getInnerLoopInductions();
1253 if (InductionPHIs.empty()) {
1254 LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to find the point to split loop latch \n"
; } } while (false)
;
1255 return false;
1256 }
1257
1258 SmallVector<Instruction *, 8> InnerIndexVarList;
1259 for (PHINode *CurInductionPHI : InductionPHIs) {
1260 if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1261 InnerIndexVarList.push_back(
1262 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
1263 else
1264 InnerIndexVarList.push_back(
1265 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
1266 }
1267
1268 // Create a new latch block for the inner loop. We split at the
1269 // current latch's terminator and then move the condition and all
1270 // operands that are not either loop-invariant or the induction PHI into the
1271 // new latch block.
1272 BasicBlock *NewLatch =
1273 SplitBlock(InnerLoop->getLoopLatch(),
1274 InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
1275
1276 SmallSetVector<Instruction *, 4> WorkList;
1277 unsigned i = 0;
1278 auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() {
1279 for (; i < WorkList.size(); i++) {
1280 // Duplicate instruction and move it the new latch. Update uses that
1281 // have been moved.
1282 Instruction *NewI = WorkList[i]->clone();
1283 NewI->insertBefore(NewLatch->getFirstNonPHI());
1284 assert(!NewI->mayHaveSideEffects() &&(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1286, __extension__
__PRETTY_FUNCTION__))
1285 "Moving instructions with side-effects may change behavior of "(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1286, __extension__
__PRETTY_FUNCTION__))
1286 "the loop nest!")(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1286, __extension__
__PRETTY_FUNCTION__))
;
1287 for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) {
1288 Instruction *UserI = cast<Instruction>(U.getUser());
1289 if (!InnerLoop->contains(UserI->getParent()) ||
1290 UserI->getParent() == NewLatch ||
1291 llvm::is_contained(InductionPHIs, UserI))
1292 U.set(NewI);
1293 }
1294 // Add operands of moved instruction to the worklist, except if they are
1295 // outside the inner loop or are the induction PHI.
1296 for (Value *Op : WorkList[i]->operands()) {
1297 Instruction *OpI = dyn_cast<Instruction>(Op);
1298 if (!OpI ||
1299 this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
1300 llvm::is_contained(InductionPHIs, OpI))
1301 continue;
1302 WorkList.insert(OpI);
1303 }
1304 }
1305 };
1306
1307 // FIXME: Should we interchange when we have a constant condition?
1308 Instruction *CondI = dyn_cast<Instruction>(
1309 cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
1310 ->getCondition());
1311 if (CondI)
1312 WorkList.insert(CondI);
1313 MoveInstructions();
1314 for (Instruction *InnerIndexVar : InnerIndexVarList)
1315 WorkList.insert(cast<Instruction>(InnerIndexVar));
1316 MoveInstructions();
1317
1318 // Splits the inner loops phi nodes out into a separate basic block.
1319 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1320 SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1321 LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "splitting InnerLoopHeader done\n"
; } } while (false)
;
1322 }
1323
1324 // Instructions in the original inner loop preheader may depend on values
1325 // defined in the outer loop header. Move them there, because the original
1326 // inner loop preheader will become the entry into the interchanged loop nest.
1327 // Currently we move all instructions and rely on LICM to move invariant
1328 // instructions outside the loop nest.
1329 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1330 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1331 if (InnerLoopPreHeader != OuterLoopHeader) {
1332 SmallPtrSet<Instruction *, 4> NeedsMoving;
1333 for (Instruction &I :
1334 make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
1335 std::prev(InnerLoopPreHeader->end()))))
1336 I.moveBefore(OuterLoopHeader->getTerminator());
1337 }
1338
1339 Transformed |= adjustLoopLinks();
1340 if (!Transformed) {
1341 LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "adjustLoopLinks failed\n"
; } } while (false)
;
1342 return false;
1343 }
1344
1345 return true;
1346}
1347
1348/// \brief Move all instructions except the terminator from FromBB right before
1349/// InsertBefore
1350static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1351 auto &ToList = InsertBefore->getParent()->getInstList();
1352 auto &FromList = FromBB->getInstList();
1353
1354 ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
1355 FromBB->getTerminator()->getIterator());
1356}
1357
1358/// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
1359static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
1360 // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
1361 // from BB1 afterwards.
1362 auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
1363 SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
1364 for (Instruction *I : TempInstrs)
1365 I->removeFromParent();
1366
1367 // Move instructions from BB2 to BB1.
1368 moveBBContents(BB2, BB1->getTerminator());
1369
1370 // Move instructions from TempInstrs to BB2.
1371 for (Instruction *I : TempInstrs)
1372 I->insertBefore(BB2->getTerminator());
1373}
1374
1375// Update BI to jump to NewBB instead of OldBB. Records updates to the
1376// dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1377// \p OldBB is exactly once in BI's successor list.
1378static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1379 BasicBlock *NewBB,
1380 std::vector<DominatorTree::UpdateType> &DTUpdates,
1381 bool MustUpdateOnce = true) {
1382 assert((!MustUpdateOnce ||(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1386, __extension__
__PRETTY_FUNCTION__))
1383 llvm::count_if(successors(BI),(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1386, __extension__
__PRETTY_FUNCTION__))
1384 [OldBB](BasicBlock *BB) {(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1386, __extension__
__PRETTY_FUNCTION__))
1385 return BB == OldBB;(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1386, __extension__
__PRETTY_FUNCTION__))
1386 }) == 1) && "BI must jump to OldBB exactly once.")(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1386, __extension__
__PRETTY_FUNCTION__))
;
1387 bool Changed = false;
1388 for (Use &Op : BI->operands())
1389 if (Op == OldBB) {
1390 Op.set(NewBB);
1391 Changed = true;
1392 }
1393
1394 if (Changed) {
1395 DTUpdates.push_back(
1396 {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
1397 DTUpdates.push_back(
1398 {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
1399 }
1400 assert(Changed && "Expected a successor to be updated")(static_cast <bool> (Changed && "Expected a successor to be updated"
) ? void (0) : __assert_fail ("Changed && \"Expected a successor to be updated\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1400, __extension__
__PRETTY_FUNCTION__))
;
1401}
1402
1403// Move Lcssa PHIs to the right place.
1404static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
1405 BasicBlock *InnerLatch, BasicBlock *OuterHeader,
1406 BasicBlock *OuterLatch, BasicBlock *OuterExit,
1407 Loop *InnerLoop, LoopInfo *LI) {
1408
1409 // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
1410 // defined either in the header or latch. Those blocks will become header and
1411 // latch of the new outer loop, and the only possible users can PHI nodes
1412 // in the exit block of the loop nest or the outer loop header (reduction
1413 // PHIs, in that case, the incoming value must be defined in the inner loop
1414 // header). We can just substitute the user with the incoming value and remove
1415 // the PHI.
1416 for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
1417 assert(P.getNumIncomingValues() == 1 &&(static_cast <bool> (P.getNumIncomingValues() == 1 &&
"Only loops with a single exit are supported!") ? void (0) :
__assert_fail ("P.getNumIncomingValues() == 1 && \"Only loops with a single exit are supported!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1418, __extension__
__PRETTY_FUNCTION__))
1418 "Only loops with a single exit are supported!")(static_cast <bool> (P.getNumIncomingValues() == 1 &&
"Only loops with a single exit are supported!") ? void (0) :
__assert_fail ("P.getNumIncomingValues() == 1 && \"Only loops with a single exit are supported!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1418, __extension__
__PRETTY_FUNCTION__))
;
1419
1420 // Incoming values are guaranteed be instructions currently.
1421 auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
1422 // Skip phis with incoming values from the inner loop body, excluding the
1423 // header and latch.
1424 if (IncI->getParent() != InnerLatch && IncI->getParent() != InnerHeader)
1425 continue;
1426
1427 assert(all_of(P.users(),(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1435, __extension__
__PRETTY_FUNCTION__))
1428 [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1435, __extension__
__PRETTY_FUNCTION__))
1429 return (cast<PHINode>(U)->getParent() == OuterHeader &&(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1435, __extension__
__PRETTY_FUNCTION__))
1430 IncI->getParent() == InnerHeader) ||(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1435, __extension__
__PRETTY_FUNCTION__))
1431 cast<PHINode>(U)->getParent() == OuterExit;(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1435, __extension__
__PRETTY_FUNCTION__))
1432 }) &&(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1435, __extension__
__PRETTY_FUNCTION__))
1433 "Can only replace phis iff the uses are in the loop nest exit or "(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1435, __extension__
__PRETTY_FUNCTION__))
1434 "the incoming value is defined in the inner header (it will "(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1435, __extension__
__PRETTY_FUNCTION__))
1435 "dominate all loop blocks after interchanging)")(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1435, __extension__
__PRETTY_FUNCTION__))
;
1436 P.replaceAllUsesWith(IncI);
1437 P.eraseFromParent();
1438 }
1439
1440 SmallVector<PHINode *, 8> LcssaInnerExit;
1441 for (PHINode &P : InnerExit->phis())
1442 LcssaInnerExit.push_back(&P);
1443
1444 SmallVector<PHINode *, 8> LcssaInnerLatch;
1445 for (PHINode &P : InnerLatch->phis())
1446 LcssaInnerLatch.push_back(&P);
1447
1448 // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1449 // If a PHI node has users outside of InnerExit, it has a use outside the
1450 // interchanged loop and we have to preserve it. We move these to
1451 // InnerLatch, which will become the new exit block for the innermost
1452 // loop after interchanging.
1453 for (PHINode *P : LcssaInnerExit)
1
Assuming '__begin1' is equal to '__end1'
1454 P->moveBefore(InnerLatch->getFirstNonPHI());
1455
1456 // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1457 // and we have to move them to the new inner latch.
1458 for (PHINode *P : LcssaInnerLatch)
2
Assuming '__begin1' is equal to '__end1'
1459 P->moveBefore(InnerExit->getFirstNonPHI());
1460
1461 // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1462 // incoming values defined in the outer loop, we have to add a new PHI
1463 // in the inner loop latch, which became the exit block of the outer loop,
1464 // after interchanging.
1465 if (OuterExit) {
3
Assuming 'OuterExit' is non-null
4
Taking true branch
1466 for (PHINode &P : OuterExit->phis()) {
1467 if (P.getNumIncomingValues() != 1)
5
Assuming the condition is false
6
Taking false branch
1468 continue;
1469 // Skip Phis with incoming values defined in the inner loop. Those should
1470 // already have been updated.
1471 auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
1472 if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
7
Assuming 'I' is non-null
8
Assuming the condition is false
9
Taking false branch
1473 continue;
1474
1475 PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
10
Assuming the object is not a 'PHINode'
11
'NewPhi' initialized to a null pointer value
1476 NewPhi->setIncomingValue(0, P.getIncomingValue(0));
12
Called C++ object pointer is null
1477 NewPhi->setIncomingBlock(0, OuterLatch);
1478 // We might have incoming edges from other BBs, i.e., the original outer
1479 // header.
1480 for (auto *Pred : predecessors(InnerLatch)) {
1481 if (Pred == OuterLatch)
1482 continue;
1483 NewPhi->addIncoming(P.getIncomingValue(0), Pred);
1484 }
1485 NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
1486 P.setIncomingValue(0, NewPhi);
1487 }
1488 }
1489
1490 // Now adjust the incoming blocks for the LCSSA PHIs.
1491 // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1492 // with the new latch.
1493 InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
1494}
1495
1496bool LoopInterchangeTransform::adjustLoopBranches() {
1497 LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "adjustLoopBranches called\n"
; } } while (false)
;
1498 std::vector<DominatorTree::UpdateType> DTUpdates;
1499
1500 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1501 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1502
1503 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1505, __extension__
__PRETTY_FUNCTION__))
1504 InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1505, __extension__
__PRETTY_FUNCTION__))
1505 InnerLoopPreHeader && "Guaranteed by loop-simplify form")(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1505, __extension__
__PRETTY_FUNCTION__))
;
1506 // Ensure that both preheaders do not contain PHI nodes and have single
1507 // predecessors. This allows us to move them easily. We use
1508 // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1509 // preheaders do not satisfy those conditions.
1510 if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1511 !OuterLoopPreHeader->getUniquePredecessor())
1512 OuterLoopPreHeader =
1513 InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
1514 if (InnerLoopPreHeader == OuterLoop->getHeader())
1515 InnerLoopPreHeader =
1516 InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
1517
1518 // Adjust the loop preheader
1519 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1520 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1521 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1522 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1523 BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1524 BasicBlock *InnerLoopLatchPredecessor =
1525 InnerLoopLatch->getUniquePredecessor();
1526 BasicBlock *InnerLoopLatchSuccessor;
1527 BasicBlock *OuterLoopLatchSuccessor;
1528
1529 BranchInst *OuterLoopLatchBI =
1530 dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1531 BranchInst *InnerLoopLatchBI =
1532 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1533 BranchInst *OuterLoopHeaderBI =
1534 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1535 BranchInst *InnerLoopHeaderBI =
1536 dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1537
1538 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1539 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1540 !InnerLoopHeaderBI)
1541 return false;
1542
1543 BranchInst *InnerLoopLatchPredecessorBI =
1544 dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1545 BranchInst *OuterLoopPredecessorBI =
1546 dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1547
1548 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1549 return false;
1550 BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1551 if (!InnerLoopHeaderSuccessor)
1552 return false;
1553
1554 // Adjust Loop Preheader and headers.
1555 // The branches in the outer loop predecessor and the outer loop header can
1556 // be unconditional branches or conditional branches with duplicates. Consider
1557 // this when updating the successors.
1558 updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1559 InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
1560 // The outer loop header might or might not branch to the outer latch.
1561 // We are guaranteed to branch to the inner loop preheader.
1562 if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
1563 // In this case the outerLoopHeader should branch to the InnerLoopLatch.
1564 updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
1565 DTUpdates,
1566 /*MustUpdateOnce=*/false);
1567 }
1568 updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1569 InnerLoopHeaderSuccessor, DTUpdates,
1570 /*MustUpdateOnce=*/false);
1571
1572 // Adjust reduction PHI's now that the incoming block has changed.
1573 InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
1574 OuterLoopHeader);
1575
1576 updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1577 OuterLoopPreHeader, DTUpdates);
1578
1579 // -------------Adjust loop latches-----------
1580 if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1581 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1582 else
1583 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1584
1585 updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1586 InnerLoopLatchSuccessor, DTUpdates);
1587
1588 if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1589 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1590 else
1591 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1592
1593 updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1594 OuterLoopLatchSuccessor, DTUpdates);
1595 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1596 DTUpdates);
1597
1598 DT->applyUpdates(DTUpdates);
1599 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1600 OuterLoopPreHeader);
1601
1602 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1603 OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
1604 InnerLoop, LI);
1605 // For PHIs in the exit block of the outer loop, outer's latch has been
1606 // replaced by Inners'.
1607 OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1608
1609 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1610 // Now update the reduction PHIs in the inner and outer loop headers.
1611 SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1612 for (PHINode &PHI : InnerLoopHeader->phis())
1613 if (OuterInnerReductions.contains(&PHI))
1614 InnerLoopPHIs.push_back(&PHI);
1615
1616 for (PHINode &PHI : OuterLoopHeader->phis())
1617 if (OuterInnerReductions.contains(&PHI))
1618 OuterLoopPHIs.push_back(&PHI);
1619
1620 // Now move the remaining reduction PHIs from outer to inner loop header and
1621 // vice versa. The PHI nodes must be part of a reduction across the inner and
1622 // outer loop and all the remains to do is and updating the incoming blocks.
1623 for (PHINode *PHI : OuterLoopPHIs) {
1624 LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Outer loop reduction PHIs:\n"
; PHI->dump();; } } while (false)
;
1625 PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
1626 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node")(static_cast <bool> (OuterInnerReductions.count(PHI) &&
"Expected a reduction PHI node") ? void (0) : __assert_fail (
"OuterInnerReductions.count(PHI) && \"Expected a reduction PHI node\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1626, __extension__
__PRETTY_FUNCTION__))
;
1627 }
1628 for (PHINode *PHI : InnerLoopPHIs) {
1629 LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop reduction PHIs:\n"
; PHI->dump();; } } while (false)
;
1630 PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
1631 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node")(static_cast <bool> (OuterInnerReductions.count(PHI) &&
"Expected a reduction PHI node") ? void (0) : __assert_fail (
"OuterInnerReductions.count(PHI) && \"Expected a reduction PHI node\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1631, __extension__
__PRETTY_FUNCTION__))
;
1632 }
1633
1634 // Update the incoming blocks for moved PHI nodes.
1635 OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
1636 OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
1637 InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
1638 InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1639
1640 // Values defined in the outer loop header could be used in the inner loop
1641 // latch. In that case, we need to create LCSSA phis for them, because after
1642 // interchanging they will be defined in the new inner loop and used in the
1643 // new outer loop.
1644 IRBuilder<> Builder(OuterLoopHeader->getContext());
1645 SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
1646 for (Instruction &I :
1647 make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
1648 MayNeedLCSSAPhis.push_back(&I);
1649 formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE, Builder);
1650
1651 return true;
1652}
1653
1654bool LoopInterchangeTransform::adjustLoopLinks() {
1655 // Adjust all branches in the inner and outer loop.
1656 bool Changed = adjustLoopBranches();
1657 if (Changed) {
1658 // We have interchanged the preheaders so we need to interchange the data in
1659 // the preheaders as well. This is because the content of the inner
1660 // preheader was previously executed inside the outer loop.
1661 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1662 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1663 swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
1664 }
1665 return Changed;
1666}
1667
1668namespace {
1669/// Main LoopInterchange Pass.
1670struct LoopInterchangeLegacyPass : public LoopPass {
1671 static char ID;
1672
1673 LoopInterchangeLegacyPass() : LoopPass(ID) {
1674 initializeLoopInterchangeLegacyPassPass(*PassRegistry::getPassRegistry());
1675 }
1676
1677 void getAnalysisUsage(AnalysisUsage &AU) const override {
1678 AU.addRequired<DependenceAnalysisWrapperPass>();
1679 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1680
1681 getLoopAnalysisUsage(AU);
1682 }
1683
1684 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1685 if (skipLoop(L))
1686 return false;
1687
1688 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1689 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1690 auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
1691 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1692 auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1693
1694 return LoopInterchange(SE, LI, DI, DT, ORE).run(L);
1695 }
1696};
1697} // namespace
1698
1699char LoopInterchangeLegacyPass::ID = 0;
1700
1701INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange",static void *initializeLoopInterchangeLegacyPassPassOnce(PassRegistry
&Registry) {
1702 "Interchanges loops for cache reuse", false, false)static void *initializeLoopInterchangeLegacyPassPassOnce(PassRegistry
&Registry) {
1703INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry);
1704INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)initializeDependenceAnalysisWrapperPassPass(Registry);
1705INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)initializeOptimizationRemarkEmitterWrapperPassPass(Registry);
1706
1707INITIALIZE_PASS_END(LoopInterchangeLegacyPass, "loop-interchange",PassInfo *PI = new PassInfo( "Interchanges loops for cache reuse"
, "loop-interchange", &LoopInterchangeLegacyPass::ID, PassInfo
::NormalCtor_t(callDefaultCtor<LoopInterchangeLegacyPass>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeLoopInterchangeLegacyPassPassFlag
; void llvm::initializeLoopInterchangeLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeLoopInterchangeLegacyPassPassFlag
, initializeLoopInterchangeLegacyPassPassOnce, std::ref(Registry
)); }
1708 "Interchanges loops for cache reuse", false, false)PassInfo *PI = new PassInfo( "Interchanges loops for cache reuse"
, "loop-interchange", &LoopInterchangeLegacyPass::ID, PassInfo
::NormalCtor_t(callDefaultCtor<LoopInterchangeLegacyPass>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeLoopInterchangeLegacyPassPassFlag
; void llvm::initializeLoopInterchangeLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeLoopInterchangeLegacyPassPassFlag
, initializeLoopInterchangeLegacyPassPassOnce, std::ref(Registry
)); }
1709
1710Pass *llvm::createLoopInterchangePass() {
1711 return new LoopInterchangeLegacyPass();
1712}
1713
1714PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
1715 LoopAnalysisManager &AM,
1716 LoopStandardAnalysisResults &AR,
1717 LPMUpdater &U) {
1718 Function &F = *LN.getParent();
1719
1720 DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
1721 OptimizationRemarkEmitter ORE(&F);
1722 if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(LN))
1723 return PreservedAnalyses::all();
1724 return getLoopPassPreservedAnalyses();
1725}