Bug Summary

File:llvm/lib/Transforms/Scalar/LoopInterchange.cpp
Warning:line 669, column 8
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name LoopInterchange.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220114111422+ed30a968b5d6/build-llvm -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-14~++20220114111422+ed30a968b5d6/llvm/lib/Transforms/Scalar -I include -I /build/llvm-toolchain-snapshot-14~++20220114111422+ed30a968b5d6/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220114111422+ed30a968b5d6/build-llvm=build-llvm -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220114111422+ed30a968b5d6/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220114111422+ed30a968b5d6/build-llvm=build-llvm -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220114111422+ed30a968b5d6/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220114111422+ed30a968b5d6/build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220114111422+ed30a968b5d6/build-llvm=build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220114111422+ed30a968b5d6/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-01-14-124155-33152-1 -x c++ /build/llvm-toolchain-snapshot-14~++20220114111422+ed30a968b5d6/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
1//===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This Pass handles loop interchange transform.
10// This pass interchanges loops to provide a more cache-friendly memory access
11// patterns.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/Scalar/LoopInterchange.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/Analysis/DependenceAnalysis.h"
21#include "llvm/Analysis/LoopInfo.h"
22#include "llvm/Analysis/LoopNestAnalysis.h"
23#include "llvm/Analysis/LoopPass.h"
24#include "llvm/Analysis/OptimizationRemarkEmitter.h"
25#include "llvm/Analysis/ScalarEvolution.h"
26#include "llvm/Analysis/ScalarEvolutionExpressions.h"
27#include "llvm/IR/BasicBlock.h"
28#include "llvm/IR/Constants.h"
29#include "llvm/IR/DiagnosticInfo.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/IRBuilder.h"
33#include "llvm/IR/InstrTypes.h"
34#include "llvm/IR/Instruction.h"
35#include "llvm/IR/Instructions.h"
36#include "llvm/IR/Type.h"
37#include "llvm/IR/User.h"
38#include "llvm/IR/Value.h"
39#include "llvm/InitializePasses.h"
40#include "llvm/Pass.h"
41#include "llvm/Support/Casting.h"
42#include "llvm/Support/CommandLine.h"
43#include "llvm/Support/Debug.h"
44#include "llvm/Support/ErrorHandling.h"
45#include "llvm/Support/raw_ostream.h"
46#include "llvm/Transforms/Scalar.h"
47#include "llvm/Transforms/Utils.h"
48#include "llvm/Transforms/Utils/BasicBlockUtils.h"
49#include "llvm/Transforms/Utils/LoopUtils.h"
50#include <cassert>
51#include <utility>
52#include <vector>
53
54using namespace llvm;
55
56#define DEBUG_TYPE"loop-interchange" "loop-interchange"
57
58STATISTIC(LoopsInterchanged, "Number of loops interchanged")static llvm::Statistic LoopsInterchanged = {"loop-interchange"
, "LoopsInterchanged", "Number of loops interchanged"}
;
59
60static cl::opt<int> LoopInterchangeCostThreshold(
61 "loop-interchange-threshold", cl::init(0), cl::Hidden,
62 cl::desc("Interchange if you gain more than this number"));
63
64namespace {
65
66using LoopVector = SmallVector<Loop *, 8>;
67
68// TODO: Check if we can use a sparse matrix here.
69using CharMatrix = std::vector<std::vector<char>>;
70
71} // end anonymous namespace
72
73// Maximum number of dependencies that can be handled in the dependency matrix.
74static const unsigned MaxMemInstrCount = 100;
75
76// Maximum loop depth supported.
77static const unsigned MaxLoopNestDepth = 10;
78
79#ifdef DUMP_DEP_MATRICIES
80static void printDepMatrix(CharMatrix &DepMatrix) {
81 for (auto &Row : DepMatrix) {
82 for (auto D : Row)
83 LLVM_DEBUG(dbgs() << D << " ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << D << " "; } } while
(false)
;
84 LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "\n"; } } while (false
)
;
85 }
86}
87#endif
88
89static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
90 Loop *L, DependenceInfo *DI) {
91 using ValueVector = SmallVector<Value *, 16>;
92
93 ValueVector MemInstr;
94
95 // For each block.
96 for (BasicBlock *BB : L->blocks()) {
97 // Scan the BB and collect legal loads and stores.
98 for (Instruction &I : *BB) {
99 if (!isa<Instruction>(I))
100 return false;
101 if (auto *Ld = dyn_cast<LoadInst>(&I)) {
102 if (!Ld->isSimple())
103 return false;
104 MemInstr.push_back(&I);
105 } else if (auto *St = dyn_cast<StoreInst>(&I)) {
106 if (!St->isSimple())
107 return false;
108 MemInstr.push_back(&I);
109 }
110 }
111 }
112
113 LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found " << MemInstr
.size() << " Loads and Stores to analyze\n"; } } while (
false)
114 << " Loads and Stores to analyze\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found " << MemInstr
.size() << " Loads and Stores to analyze\n"; } } while (
false)
;
115
116 ValueVector::iterator I, IE, J, JE;
117
118 for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
119 for (J = I, JE = MemInstr.end(); J != JE; ++J) {
120 std::vector<char> Dep;
121 Instruction *Src = cast<Instruction>(*I);
122 Instruction *Dst = cast<Instruction>(*J);
123 if (Src == Dst)
124 continue;
125 // Ignore Input dependencies.
126 if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
127 continue;
128 // Track Output, Flow, and Anti dependencies.
129 if (auto D = DI->depends(Src, Dst, true)) {
130 assert(D->isOrdered() && "Expected an output, flow or anti dep.")(static_cast <bool> (D->isOrdered() && "Expected an output, flow or anti dep."
) ? void (0) : __assert_fail ("D->isOrdered() && \"Expected an output, flow or anti dep.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 130, __extension__
__PRETTY_FUNCTION__))
;
131 LLVM_DEBUG(StringRef DepType =do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
132 D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
133 dbgs() << "Found " << DepTypedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
134 << " dependency between Src and Dst\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
135 << " Src:" << *Src << "\n Dst:" << *Dst << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
;
136 unsigned Levels = D->getLevels();
137 char Direction;
138 for (unsigned II = 1; II <= Levels; ++II) {
139 const SCEV *Distance = D->getDistance(II);
140 const SCEVConstant *SCEVConst =
141 dyn_cast_or_null<SCEVConstant>(Distance);
142 if (SCEVConst) {
143 const ConstantInt *CI = SCEVConst->getValue();
144 if (CI->isNegative())
145 Direction = '<';
146 else if (CI->isZero())
147 Direction = '=';
148 else
149 Direction = '>';
150 Dep.push_back(Direction);
151 } else if (D->isScalar(II)) {
152 Direction = 'S';
153 Dep.push_back(Direction);
154 } else {
155 unsigned Dir = D->getDirection(II);
156 if (Dir == Dependence::DVEntry::LT ||
157 Dir == Dependence::DVEntry::LE)
158 Direction = '<';
159 else if (Dir == Dependence::DVEntry::GT ||
160 Dir == Dependence::DVEntry::GE)
161 Direction = '>';
162 else if (Dir == Dependence::DVEntry::EQ)
163 Direction = '=';
164 else
165 Direction = '*';
166 Dep.push_back(Direction);
167 }
168 }
169 while (Dep.size() != Level) {
170 Dep.push_back('I');
171 }
172
173 DepMatrix.push_back(Dep);
174 if (DepMatrix.size() > MaxMemInstrCount) {
175 LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle more than "
<< MaxMemInstrCount << " dependencies inside loop\n"
; } } while (false)
176 << " dependencies inside loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle more than "
<< MaxMemInstrCount << " dependencies inside loop\n"
; } } while (false)
;
177 return false;
178 }
179 }
180 }
181 }
182
183 return true;
184}
185
186// A loop is moved from index 'from' to an index 'to'. Update the Dependence
187// matrix by exchanging the two columns.
188static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
189 unsigned ToIndx) {
190 for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I)
191 std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]);
192}
193
194// Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
195// '>'
196static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
197 unsigned Column) {
198 for (unsigned i = 0; i <= Column; ++i) {
199 if (DepMatrix[Row][i] == '<')
200 return false;
201 if (DepMatrix[Row][i] == '>')
202 return true;
203 }
204 // All dependencies were '=','S' or 'I'
205 return false;
206}
207
208// Checks if no dependence exist in the dependency matrix in Row before Column.
209static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
210 unsigned Column) {
211 for (unsigned i = 0; i < Column; ++i) {
212 if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' &&
213 DepMatrix[Row][i] != 'I')
214 return false;
215 }
216 return true;
217}
218
219static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
220 unsigned OuterLoopId, char InnerDep,
221 char OuterDep) {
222 if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
223 return false;
224
225 if (InnerDep == OuterDep)
226 return true;
227
228 // It is legal to interchange if and only if after interchange no row has a
229 // '>' direction as the leftmost non-'='.
230
231 if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
232 return true;
233
234 if (InnerDep == '<')
235 return true;
236
237 if (InnerDep == '>') {
238 // If OuterLoopId represents outermost loop then interchanging will make the
239 // 1st dependency as '>'
240 if (OuterLoopId == 0)
241 return false;
242
243 // If all dependencies before OuterloopId are '=','S'or 'I'. Then
244 // interchanging will result in this row having an outermost non '='
245 // dependency of '>'
246 if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
247 return true;
248 }
249
250 return false;
251}
252
253// Checks if it is legal to interchange 2 loops.
254// [Theorem] A permutation of the loops in a perfect nest is legal if and only
255// if the direction matrix, after the same permutation is applied to its
256// columns, has no ">" direction as the leftmost non-"=" direction in any row.
257static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
258 unsigned InnerLoopId,
259 unsigned OuterLoopId) {
260 unsigned NumRows = DepMatrix.size();
261 // For each row check if it is valid to interchange.
262 for (unsigned Row = 0; Row < NumRows; ++Row) {
263 char InnerDep = DepMatrix[Row][InnerLoopId];
264 char OuterDep = DepMatrix[Row][OuterLoopId];
265 if (InnerDep == '*' || OuterDep == '*')
266 return false;
267 if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))
268 return false;
269 }
270 return true;
271}
272
273static LoopVector populateWorklist(Loop &L) {
274 LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
275 << L.getHeader()->getParent()->getName() << " Loop: %"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
276 << L.getHeader()->getName() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
;
277 LoopVector LoopList;
278 Loop *CurrentLoop = &L;
279 const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
280 while (!Vec->empty()) {
281 // The current loop has multiple subloops in it hence it is not tightly
282 // nested.
283 // Discard all loops above it added into Worklist.
284 if (Vec->size() != 1)
285 return {};
286
287 LoopList.push_back(CurrentLoop);
288 CurrentLoop = Vec->front();
289 Vec = &CurrentLoop->getSubLoops();
290 }
291 LoopList.push_back(CurrentLoop);
292 return LoopList;
293}
294
295static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
296 PHINode *InnerIndexVar = L->getCanonicalInductionVariable();
297 if (InnerIndexVar)
298 return InnerIndexVar;
299 if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr)
300 return nullptr;
301 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
302 PHINode *PhiVar = cast<PHINode>(I);
303 Type *PhiTy = PhiVar->getType();
304 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
305 !PhiTy->isPointerTy())
306 return nullptr;
307 const SCEVAddRecExpr *AddRec =
308 dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar));
309 if (!AddRec || !AddRec->isAffine())
310 continue;
311 const SCEV *Step = AddRec->getStepRecurrence(*SE);
312 if (!isa<SCEVConstant>(Step))
313 continue;
314 // Found the induction variable.
315 // FIXME: Handle loops with more than one induction variable. Note that,
316 // currently, legality makes sure we have only one induction variable.
317 return PhiVar;
318 }
319 return nullptr;
320}
321
322namespace {
323
324/// LoopInterchangeLegality checks if it is legal to interchange the loop.
325class LoopInterchangeLegality {
326public:
327 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
328 OptimizationRemarkEmitter *ORE)
329 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
330
331 /// Check if the loops can be interchanged.
332 bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
333 CharMatrix &DepMatrix);
334
335 /// Check if the loop structure is understood. We do not handle triangular
336 /// loops for now.
337 bool isLoopStructureUnderstood(PHINode *InnerInductionVar);
338
339 bool currentLimitations();
340
341 const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
342 return OuterInnerReductions;
343 }
344
345private:
346 bool tightlyNested(Loop *Outer, Loop *Inner);
347 bool containsUnsafeInstructions(BasicBlock *BB);
348
349 /// Discover induction and reduction PHIs in the header of \p L. Induction
350 /// PHIs are added to \p Inductions, reductions are added to
351 /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
352 /// to be passed as \p InnerLoop.
353 bool findInductionAndReductions(Loop *L,
354 SmallVector<PHINode *, 8> &Inductions,
355 Loop *InnerLoop);
356
357 Loop *OuterLoop;
358 Loop *InnerLoop;
359
360 ScalarEvolution *SE;
361
362 /// Interface to emit optimization remarks.
363 OptimizationRemarkEmitter *ORE;
364
365 /// Set of reduction PHIs taking part of a reduction across the inner and
366 /// outer loop.
367 SmallPtrSet<PHINode *, 4> OuterInnerReductions;
368};
369
370/// LoopInterchangeProfitability checks if it is profitable to interchange the
371/// loop.
372class LoopInterchangeProfitability {
373public:
374 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
375 OptimizationRemarkEmitter *ORE)
376 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
377
378 /// Check if the loop interchange is profitable.
379 bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
380 CharMatrix &DepMatrix);
381
382private:
383 int getInstrOrderCost();
384
385 Loop *OuterLoop;
386 Loop *InnerLoop;
387
388 /// Scev analysis.
389 ScalarEvolution *SE;
390
391 /// Interface to emit optimization remarks.
392 OptimizationRemarkEmitter *ORE;
393};
394
395/// LoopInterchangeTransform interchanges the loop.
396class LoopInterchangeTransform {
397public:
398 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
399 LoopInfo *LI, DominatorTree *DT,
400 const LoopInterchangeLegality &LIL)
401 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
402
403 /// Interchange OuterLoop and InnerLoop.
404 bool transform();
405 void restructureLoops(Loop *NewInner, Loop *NewOuter,
406 BasicBlock *OrigInnerPreHeader,
407 BasicBlock *OrigOuterPreHeader);
408 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
409
410private:
411 bool adjustLoopLinks();
412 bool adjustLoopBranches();
413
414 Loop *OuterLoop;
415 Loop *InnerLoop;
416
417 /// Scev analysis.
418 ScalarEvolution *SE;
419
420 LoopInfo *LI;
421 DominatorTree *DT;
422
423 const LoopInterchangeLegality &LIL;
424};
425
426struct LoopInterchange {
427 ScalarEvolution *SE = nullptr;
428 LoopInfo *LI = nullptr;
429 DependenceInfo *DI = nullptr;
430 DominatorTree *DT = nullptr;
431
432 /// Interface to emit optimization remarks.
433 OptimizationRemarkEmitter *ORE;
434
435 LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
436 DominatorTree *DT, OptimizationRemarkEmitter *ORE)
437 : SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {}
438
439 bool run(Loop *L) {
440 if (L->getParentLoop())
441 return false;
442
443 return processLoopList(populateWorklist(*L));
444 }
445
446 bool run(LoopNest &LN) {
447 const auto &LoopList = LN.getLoops();
448 for (unsigned I = 1; I < LoopList.size(); ++I)
449 if (LoopList[I]->getParentLoop() != LoopList[I - 1])
450 return false;
451 return processLoopList(LoopList);
452 }
453
454 bool isComputableLoopNest(ArrayRef<Loop *> LoopList) {
455 for (Loop *L : LoopList) {
456 const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
457 if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
458 LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Couldn't compute backedge count\n"
; } } while (false)
;
459 return false;
460 }
461 if (L->getNumBackEdges() != 1) {
462 LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "NumBackEdges is not equal to 1\n"
; } } while (false)
;
463 return false;
464 }
465 if (!L->getExitingBlock()) {
466 LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop doesn't have unique exit block\n"
; } } while (false)
;
467 return false;
468 }
469 }
470 return true;
471 }
472
473 unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
474 // TODO: Add a better heuristic to select the loop to be interchanged based
475 // on the dependence matrix. Currently we select the innermost loop.
476 return LoopList.size() - 1;
477 }
478
479 bool processLoopList(ArrayRef<Loop *> LoopList) {
480 bool Changed = false;
481 unsigned LoopNestDepth = LoopList.size();
482 if (LoopNestDepth < 2) {
483 LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop doesn't contain minimum nesting level.\n"
; } } while (false)
;
484 return false;
485 }
486 if (LoopNestDepth > MaxLoopNestDepth) {
487 LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle loops of depth greater than "
<< MaxLoopNestDepth << "\n"; } } while (false)
488 << MaxLoopNestDepth << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle loops of depth greater than "
<< MaxLoopNestDepth << "\n"; } } while (false)
;
489 return false;
490 }
491 if (!isComputableLoopNest(LoopList)) {
492 LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not valid loop candidate for interchange\n"
; } } while (false)
;
493 return false;
494 }
495
496 LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepthdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing LoopList of size = "
<< LoopNestDepth << "\n"; } } while (false)
497 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing LoopList of size = "
<< LoopNestDepth << "\n"; } } while (false)
;
498
499 CharMatrix DependencyMatrix;
500 Loop *OuterMostLoop = *(LoopList.begin());
501 if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
502 OuterMostLoop, DI)) {
503 LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Populating dependency matrix failed\n"
; } } while (false)
;
504 return false;
505 }
506#ifdef DUMP_DEP_MATRICIES
507 LLVM_DEBUG(dbgs() << "Dependence before interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Dependence before interchange\n"
; } } while (false)
;
508 printDepMatrix(DependencyMatrix);
509#endif
510
511 // Get the Outermost loop exit.
512 BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
513 if (!LoopNestExit) {
514 LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "OuterMostLoop needs an unique exit block"
; } } while (false)
;
515 return false;
516 }
517
518 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
519 // Move the selected loop outwards to the best possible position.
520 Loop *LoopToBeInterchanged = LoopList[SelecLoopId];
521 for (unsigned i = SelecLoopId; i > 0; i--) {
522 bool Interchanged = processLoop(LoopToBeInterchanged, LoopList[i - 1], i,
523 i - 1, DependencyMatrix);
524 if (!Interchanged)
525 return Changed;
526 // Update the DependencyMatrix
527 interChangeDependencies(DependencyMatrix, i, i - 1);
528#ifdef DUMP_DEP_MATRICIES
529 LLVM_DEBUG(dbgs() << "Dependence after interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Dependence after interchange\n"
; } } while (false)
;
530 printDepMatrix(DependencyMatrix);
531#endif
532 Changed |= Interchanged;
533 }
534 return Changed;
535 }
536
537 bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
538 unsigned OuterLoopId,
539 std::vector<std::vector<char>> &DependencyMatrix) {
540 LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << "\n"; } } while (false)
541 << " and OuterLoopId = " << OuterLoopId << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << "\n"; } } while (false)
;
542 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
543 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
544 LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not interchanging loops. Cannot prove legality.\n"
; } } while (false)
;
545 return false;
546 }
547 LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops are legal to interchange\n"
; } } while (false)
;
548 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
549 if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
550 LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Interchanging loops not profitable.\n"
; } } while (false)
;
551 return false;
552 }
553
554 ORE->emit([&]() {
555 return OptimizationRemark(DEBUG_TYPE"loop-interchange", "Interchanged",
556 InnerLoop->getStartLoc(),
557 InnerLoop->getHeader())
558 << "Loop interchanged with enclosing loop.";
559 });
560
561 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
562 LIT.transform();
563 LLVM_DEBUG(dbgs() << "Loops interchanged.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops interchanged.\n"
; } } while (false)
;
564 LoopsInterchanged++;
565
566 assert(InnerLoop->isLCSSAForm(*DT) &&(static_cast <bool> (InnerLoop->isLCSSAForm(*DT) &&
"Inner loop not left in LCSSA form after loop interchange!")
? void (0) : __assert_fail ("InnerLoop->isLCSSAForm(*DT) && \"Inner loop not left in LCSSA form after loop interchange!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 567, __extension__
__PRETTY_FUNCTION__))
567 "Inner loop not left in LCSSA form after loop interchange!")(static_cast <bool> (InnerLoop->isLCSSAForm(*DT) &&
"Inner loop not left in LCSSA form after loop interchange!")
? void (0) : __assert_fail ("InnerLoop->isLCSSAForm(*DT) && \"Inner loop not left in LCSSA form after loop interchange!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 567, __extension__
__PRETTY_FUNCTION__))
;
568 assert(OuterLoop->isLCSSAForm(*DT) &&(static_cast <bool> (OuterLoop->isLCSSAForm(*DT) &&
"Outer loop not left in LCSSA form after loop interchange!")
? void (0) : __assert_fail ("OuterLoop->isLCSSAForm(*DT) && \"Outer loop not left in LCSSA form after loop interchange!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 569, __extension__
__PRETTY_FUNCTION__))
569 "Outer loop not left in LCSSA form after loop interchange!")(static_cast <bool> (OuterLoop->isLCSSAForm(*DT) &&
"Outer loop not left in LCSSA form after loop interchange!")
? void (0) : __assert_fail ("OuterLoop->isLCSSAForm(*DT) && \"Outer loop not left in LCSSA form after loop interchange!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 569, __extension__
__PRETTY_FUNCTION__))
;
570
571 return true;
572 }
573};
574
575} // end anonymous namespace
576
577bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
578 return any_of(*BB, [](const Instruction &I) {
579 return I.mayHaveSideEffects() || I.mayReadFromMemory();
580 });
581}
582
583bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
584 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
585 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
586 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
587
588 LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Checking if loops are tightly nested\n"
; } } while (false)
;
589
590 // A perfectly nested loop will not have any branch in between the outer and
591 // inner block i.e. outer header will branch to either inner preheader and
592 // outerloop latch.
593 BranchInst *OuterLoopHeaderBI =
594 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
595 if (!OuterLoopHeaderBI)
596 return false;
597
598 for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
599 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
600 Succ != OuterLoopLatch)
601 return false;
602
603 LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Checking instructions in Loop header and Loop latch\n"
; } } while (false)
;
604 // We do not have any basic block in between now make sure the outer header
605 // and outer loop latch doesn't contain any unsafe instructions.
606 if (containsUnsafeInstructions(OuterLoopHeader) ||
607 containsUnsafeInstructions(OuterLoopLatch))
608 return false;
609
610 // Also make sure the inner loop preheader does not contain any unsafe
611 // instructions. Note that all instructions in the preheader will be moved to
612 // the outer loop header when interchanging.
613 if (InnerLoopPreHeader != OuterLoopHeader &&
614 containsUnsafeInstructions(InnerLoopPreHeader))
615 return false;
616
617 BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
618 // Ensure the inner loop exit block flows to the outer loop latch possibly
619 // through empty blocks.
620 const BasicBlock &SuccInner =
621 LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch);
622 if (&SuccInner != OuterLoopLatch) {
623 LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExitdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop exit block "
<< *InnerLoopExit << " does not lead to the outer loop latch.\n"
;; } } while (false)
624 << " does not lead to the outer loop latch.\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop exit block "
<< *InnerLoopExit << " does not lead to the outer loop latch.\n"
;; } } while (false)
;
625 return false;
626 }
627 // The inner loop exit block does flow to the outer loop latch and not some
628 // other BBs, now make sure it contains safe instructions, since it will be
629 // moved into the (new) inner loop after interchange.
630 if (containsUnsafeInstructions(InnerLoopExit))
631 return false;
632
633 LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops are perfectly nested\n"
; } } while (false)
;
634 // We have a perfect loop nest.
635 return true;
636}
637
638bool LoopInterchangeLegality::isLoopStructureUnderstood(
639 PHINode *InnerInduction) {
640 unsigned Num = InnerInduction->getNumOperands();
641 BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
642 for (unsigned i = 0; i < Num; ++i) {
13
Assuming 'i' is >= 'Num'
14
Loop condition is false. Execution continues on line 666
643 Value *Val = InnerInduction->getOperand(i);
644 if (isa<Constant>(Val))
645 continue;
646 Instruction *I = dyn_cast<Instruction>(Val);
647 if (!I)
648 return false;
649 // TODO: Handle triangular loops.
650 // e.g. for(int i=0;i<N;i++)
651 // for(int j=i;j<N;j++)
652 unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
653 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
654 InnerLoopPreheader &&
655 !OuterLoop->isLoopInvariant(I)) {
656 return false;
657 }
658 }
659
660 // TODO: Handle triangular loops of another form.
661 // e.g. for(int i=0;i<N;i++)
662 // for(int j=0;j<i;j++)
663 // or,
664 // for(int i=0;i<N;i++)
665 // for(int j=0;j*i<N;j++)
666 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
667 BranchInst *InnerLoopLatchBI =
16
'InnerLoopLatchBI' initialized to a null pointer value
668 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
15
Assuming the object is not a 'BranchInst'
669 if (!InnerLoopLatchBI->isConditional())
17
Called C++ object pointer is null
670 return false;
671 if (CmpInst *InnerLoopCmp =
672 dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) {
673 Value *Op0 = InnerLoopCmp->getOperand(0);
674 Value *Op1 = InnerLoopCmp->getOperand(1);
675
676 // LHS and RHS of the inner loop exit condition, e.g.,
677 // in "for(int j=0;j<i;j++)", LHS is j and RHS is i.
678 Value *Left = nullptr;
679 Value *Right = nullptr;
680
681 // Check if V only involves inner loop induction variable.
682 // Return true if V is InnerInduction, or a cast from
683 // InnerInduction, or a binary operator that involves
684 // InnerInduction and a constant.
685 std::function<bool(Value *)> IsPathToIndVar;
686 IsPathToIndVar = [&InnerInduction, &IsPathToIndVar](Value *V) -> bool {
687 if (V == InnerInduction)
688 return true;
689 if (isa<Constant>(V))
690 return true;
691 Instruction *I = dyn_cast<Instruction>(V);
692 if (!I)
693 return false;
694 if (isa<CastInst>(I))
695 return IsPathToIndVar(I->getOperand(0));
696 if (isa<BinaryOperator>(I))
697 return IsPathToIndVar(I->getOperand(0)) &&
698 IsPathToIndVar(I->getOperand(1));
699 return false;
700 };
701
702 if (IsPathToIndVar(Op0) && !isa<Constant>(Op0)) {
703 Left = Op0;
704 Right = Op1;
705 } else if (IsPathToIndVar(Op1) && !isa<Constant>(Op1)) {
706 Left = Op1;
707 Right = Op0;
708 }
709
710 if (Left == nullptr)
711 return false;
712
713 const SCEV *S = SE->getSCEV(Right);
714 if (!SE->isLoopInvariant(S, OuterLoop))
715 return false;
716 }
717
718 return true;
719}
720
721// If SV is a LCSSA PHI node with a single incoming value, return the incoming
722// value.
723static Value *followLCSSA(Value *SV) {
724 PHINode *PHI = dyn_cast<PHINode>(SV);
725 if (!PHI)
726 return SV;
727
728 if (PHI->getNumIncomingValues() != 1)
729 return SV;
730 return followLCSSA(PHI->getIncomingValue(0));
731}
732
733// Check V's users to see if it is involved in a reduction in L.
734static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
735 // Reduction variables cannot be constants.
736 if (isa<Constant>(V))
737 return nullptr;
738
739 for (Value *User : V->users()) {
740 if (PHINode *PHI = dyn_cast<PHINode>(User)) {
741 if (PHI->getNumIncomingValues() == 1)
742 continue;
743 RecurrenceDescriptor RD;
744 if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
745 return PHI;
746 return nullptr;
747 }
748 }
749
750 return nullptr;
751}
752
753bool LoopInterchangeLegality::findInductionAndReductions(
754 Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
755 if (!L->getLoopLatch() || !L->getLoopPredecessor())
756 return false;
757 for (PHINode &PHI : L->getHeader()->phis()) {
758 RecurrenceDescriptor RD;
759 InductionDescriptor ID;
760 if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
761 Inductions.push_back(&PHI);
762 else {
763 // PHIs in inner loops need to be part of a reduction in the outer loop,
764 // discovered when checking the PHIs of the outer loop earlier.
765 if (!InnerLoop) {
766 if (!OuterInnerReductions.count(&PHI)) {
767 LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop PHI is not part of reductions "
"across the outer loop.\n"; } } while (false)
768 "across the outer loop.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop PHI is not part of reductions "
"across the outer loop.\n"; } } while (false)
;
769 return false;
770 }
771 } else {
772 assert(PHI.getNumIncomingValues() == 2 &&(static_cast <bool> (PHI.getNumIncomingValues() == 2 &&
"Phis in loop header should have exactly 2 incoming values")
? void (0) : __assert_fail ("PHI.getNumIncomingValues() == 2 && \"Phis in loop header should have exactly 2 incoming values\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 773, __extension__
__PRETTY_FUNCTION__))
773 "Phis in loop header should have exactly 2 incoming values")(static_cast <bool> (PHI.getNumIncomingValues() == 2 &&
"Phis in loop header should have exactly 2 incoming values")
? void (0) : __assert_fail ("PHI.getNumIncomingValues() == 2 && \"Phis in loop header should have exactly 2 incoming values\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 773, __extension__
__PRETTY_FUNCTION__))
;
774 // Check if we have a PHI node in the outer loop that has a reduction
775 // result from the inner loop as an incoming value.
776 Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
777 PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
778 if (!InnerRedPhi ||
779 !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
780 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
781 dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
782 << "Failed to recognize PHI as an induction or reduction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
;
783 return false;
784 }
785 OuterInnerReductions.insert(&PHI);
786 OuterInnerReductions.insert(InnerRedPhi);
787 }
788 }
789 }
790 return true;
791}
792
793// This function indicates the current limitations in the transform as a result
794// of which we do not proceed.
795bool LoopInterchangeLegality::currentLimitations() {
796 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
797
798 // transform currently expects the loop latches to also be the exiting
799 // blocks.
800 if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
1
Assuming the condition is false
5
Taking false branch
801 OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
2
Assuming the condition is false
802 !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
3
Assuming the object is a 'BranchInst'
803 !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
4
Assuming the object is a 'BranchInst'
804 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
805 dbgs() << "Loops where the latch is not the exiting block are not"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
806 << " supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
;
807 ORE->emit([&]() {
808 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "ExitingNotLatch",
809 OuterLoop->getStartLoc(),
810 OuterLoop->getHeader())
811 << "Loops where the latch is not the exiting block cannot be"
812 " interchange currently.";
813 });
814 return true;
815 }
816
817 PHINode *InnerInductionVar;
818 SmallVector<PHINode *, 8> Inductions;
819 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
6
Assuming the condition is false
7
Taking false branch
820 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
821 dbgs() << "Only outer loops with induction or reduction PHI nodes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
822 << "are supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
;
823 ORE->emit([&]() {
824 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIOuter",
825 OuterLoop->getStartLoc(),
826 OuterLoop->getHeader())
827 << "Only outer loops with induction or reduction PHI nodes can be"
828 " interchanged currently.";
829 });
830 return true;
831 }
832
833 Inductions.clear();
834 if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) {
8
Assuming the condition is false
9
Taking false branch
835 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
836 dbgs() << "Only inner loops with induction or reduction PHI nodes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
837 << "are supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
;
838 ORE->emit([&]() {
839 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIInner",
840 InnerLoop->getStartLoc(),
841 InnerLoop->getHeader())
842 << "Only inner loops with induction or reduction PHI nodes can be"
843 " interchange currently.";
844 });
845 return true;
846 }
847
848 // TODO: Currently we handle only loops with 1 induction variable.
849 if (Inductions.size() != 1) {
10
Assuming the condition is false
11
Taking false branch
850 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "We currently only support loops with 1 induction variable."
<< "Failed to interchange due to current limitation\n"
; } } while (false)
851 dbgs() << "We currently only support loops with 1 induction variable."do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "We currently only support loops with 1 induction variable."
<< "Failed to interchange due to current limitation\n"
; } } while (false)
852 << "Failed to interchange due to current limitation\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "We currently only support loops with 1 induction variable."
<< "Failed to interchange due to current limitation\n"
; } } while (false)
;
853 ORE->emit([&]() {
854 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "MultiInductionInner",
855 InnerLoop->getStartLoc(),
856 InnerLoop->getHeader())
857 << "Only inner loops with 1 induction variable can be "
858 "interchanged currently.";
859 });
860 return true;
861 }
862 InnerInductionVar = Inductions.pop_back_val();
863
864 // TODO: Triangular loops are not handled for now.
865 if (!isLoopStructureUnderstood(InnerInductionVar)) {
12
Calling 'LoopInterchangeLegality::isLoopStructureUnderstood'
866 LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop structure not understood by pass\n"
; } } while (false)
;
867 ORE->emit([&]() {
868 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedStructureInner",
869 InnerLoop->getStartLoc(),
870 InnerLoop->getHeader())
871 << "Inner loop structure not understood currently.";
872 });
873 return true;
874 }
875
876 return false;
877}
878
879// We currently only support LCSSA PHI nodes in the inner loop exit, if their
880// users are either reduction PHIs or PHIs outside the outer loop (which means
881// the we are only interested in the final value after the loop).
882static bool
883areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL,
884 SmallPtrSetImpl<PHINode *> &Reductions) {
885 BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
886 for (PHINode &PHI : InnerExit->phis()) {
887 // Reduction lcssa phi will have only 1 incoming block that from loop latch.
888 if (PHI.getNumIncomingValues() > 1)
889 return false;
890 if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
891 PHINode *PN = dyn_cast<PHINode>(U);
892 return !PN ||
893 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
894 })) {
895 return false;
896 }
897 }
898 return true;
899}
900
901// We currently support LCSSA PHI nodes in the outer loop exit, if their
902// incoming values do not come from the outer loop latch or if the
903// outer loop latch has a single predecessor. In that case, the value will
904// be available if both the inner and outer loop conditions are true, which
905// will still be true after interchanging. If we have multiple predecessor,
906// that may not be the case, e.g. because the outer loop latch may be executed
907// if the inner loop is not executed.
908static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
909 BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
910 for (PHINode &PHI : LoopNestExit->phis()) {
911 // FIXME: We currently are not able to detect floating point reductions
912 // and have to use floating point PHIs as a proxy to prevent
913 // interchanging in the presence of floating point reductions.
914 if (PHI.getType()->isFloatingPointTy())
915 return false;
916 for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
917 Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
918 if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
919 continue;
920
921 // The incoming value is defined in the outer loop latch. Currently we
922 // only support that in case the outer loop latch has a single predecessor.
923 // This guarantees that the outer loop latch is executed if and only if
924 // the inner loop is executed (because tightlyNested() guarantees that the
925 // outer loop header only branches to the inner loop or the outer loop
926 // latch).
927 // FIXME: We could weaken this logic and allow multiple predecessors,
928 // if the values are produced outside the loop latch. We would need
929 // additional logic to update the PHI nodes in the exit block as
930 // well.
931 if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
932 return false;
933 }
934 }
935 return true;
936}
937
938// In case of multi-level nested loops, it may occur that lcssa phis exist in
939// the latch of InnerLoop, i.e., when defs of the incoming values are further
940// inside the loopnest. Sometimes those incoming values are not available
941// after interchange, since the original inner latch will become the new outer
942// latch which may have predecessor paths that do not include those incoming
943// values.
944// TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of
945// multi-level loop nests.
946static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
947 if (InnerLoop->getSubLoops().empty())
948 return true;
949 // If the original outer latch has only one predecessor, then values defined
950 // further inside the looploop, e.g., in the innermost loop, will be available
951 // at the new outer latch after interchange.
952 if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr)
953 return true;
954
955 // The outer latch has more than one predecessors, i.e., the inner
956 // exit and the inner header.
957 // PHI nodes in the inner latch are lcssa phis where the incoming values
958 // are defined further inside the loopnest. Check if those phis are used
959 // in the original inner latch. If that is the case then bail out since
960 // those incoming values may not be available at the new outer latch.
961 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
962 for (PHINode &PHI : InnerLoopLatch->phis()) {
963 for (auto *U : PHI.users()) {
964 Instruction *UI = cast<Instruction>(U);
965 if (InnerLoopLatch == UI->getParent())
966 return false;
967 }
968 }
969 return true;
970}
971
972bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
973 unsigned OuterLoopId,
974 CharMatrix &DepMatrix) {
975 if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
976 LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
977 << " and OuterLoopId = " << OuterLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
978 << " due to dependence\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
;
979 ORE->emit([&]() {
980 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "Dependence",
981 InnerLoop->getStartLoc(),
982 InnerLoop->getHeader())
983 << "Cannot interchange loops due to dependences.";
984 });
985 return false;
986 }
987 // Check if outer and inner loop contain legal instructions only.
988 for (auto *BB : OuterLoop->blocks())
989 for (Instruction &I : BB->instructionsWithoutDebug())
990 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
991 // readnone functions do not prevent interchanging.
992 if (CI->onlyWritesMemory())
993 continue;
994 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
995 dbgs() << "Loops with call instructions cannot be interchanged "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
996 << "safely.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
;
997 ORE->emit([&]() {
998 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "CallInst",
999 CI->getDebugLoc(),
1000 CI->getParent())
1001 << "Cannot interchange loops due to call instruction.";
1002 });
1003
1004 return false;
1005 }
1006
1007 if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) {
1008 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in inner loop latch.\n"
; } } while (false)
;
1009 ORE->emit([&]() {
1010 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedInnerLatchPHI",
1011 InnerLoop->getStartLoc(),
1012 InnerLoop->getHeader())
1013 << "Cannot interchange loops because unsupported PHI nodes found "
1014 "in inner loop latch.";
1015 });
1016 return false;
1017 }
1018
1019 // TODO: The loops could not be interchanged due to current limitations in the
1020 // transform module.
1021 if (currentLimitations()) {
1022 LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not legal because of current transform limitation\n"
; } } while (false)
;
1023 return false;
1024 }
1025
1026 // Check if the loops are tightly nested.
1027 if (!tightlyNested(OuterLoop, InnerLoop)) {
1028 LLVM_DEBUG(dbgs() << "Loops not tightly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops not tightly nested\n"
; } } while (false)
;
1029 ORE->emit([&]() {
1030 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NotTightlyNested",
1031 InnerLoop->getStartLoc(),
1032 InnerLoop->getHeader())
1033 << "Cannot interchange loops because they are not tightly "
1034 "nested.";
1035 });
1036 return false;
1037 }
1038
1039 if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1040 OuterInnerReductions)) {
1041 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in inner loop exit.\n"
; } } while (false)
;
1042 ORE->emit([&]() {
1043 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedExitPHI",
1044 InnerLoop->getStartLoc(),
1045 InnerLoop->getHeader())
1046 << "Found unsupported PHI node in loop exit.";
1047 });
1048 return false;
1049 }
1050
1051 if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1052 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in outer loop exit.\n"
; } } while (false)
;
1053 ORE->emit([&]() {
1054 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedExitPHI",
1055 OuterLoop->getStartLoc(),
1056 OuterLoop->getHeader())
1057 << "Found unsupported PHI node in loop exit.";
1058 });
1059 return false;
1060 }
1061
1062 return true;
1063}
1064
1065int LoopInterchangeProfitability::getInstrOrderCost() {
1066 unsigned GoodOrder, BadOrder;
1067 BadOrder = GoodOrder = 0;
1068 for (BasicBlock *BB : InnerLoop->blocks()) {
1069 for (Instruction &Ins : *BB) {
1070 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1071 unsigned NumOp = GEP->getNumOperands();
1072 bool FoundInnerInduction = false;
1073 bool FoundOuterInduction = false;
1074 for (unsigned i = 0; i < NumOp; ++i) {
1075 // Skip operands that are not SCEV-able.
1076 if (!SE->isSCEVable(GEP->getOperand(i)->getType()))
1077 continue;
1078
1079 const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
1080 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1081 if (!AR)
1082 continue;
1083
1084 // If we find the inner induction after an outer induction e.g.
1085 // for(int i=0;i<N;i++)
1086 // for(int j=0;j<N;j++)
1087 // A[i][j] = A[i-1][j-1]+k;
1088 // then it is a good order.
1089 if (AR->getLoop() == InnerLoop) {
1090 // We found an InnerLoop induction after OuterLoop induction. It is
1091 // a good order.
1092 FoundInnerInduction = true;
1093 if (FoundOuterInduction) {
1094 GoodOrder++;
1095 break;
1096 }
1097 }
1098 // If we find the outer induction after an inner induction e.g.
1099 // for(int i=0;i<N;i++)
1100 // for(int j=0;j<N;j++)
1101 // A[j][i] = A[j-1][i-1]+k;
1102 // then it is a bad order.
1103 if (AR->getLoop() == OuterLoop) {
1104 // We found an OuterLoop induction after InnerLoop induction. It is
1105 // a bad order.
1106 FoundOuterInduction = true;
1107 if (FoundInnerInduction) {
1108 BadOrder++;
1109 break;
1110 }
1111 }
1112 }
1113 }
1114 }
1115 }
1116 return GoodOrder - BadOrder;
1117}
1118
1119static bool isProfitableForVectorization(unsigned InnerLoopId,
1120 unsigned OuterLoopId,
1121 CharMatrix &DepMatrix) {
1122 // TODO: Improve this heuristic to catch more cases.
1123 // If the inner loop is loop independent or doesn't carry any dependency it is
1124 // profitable to move this to outer position.
1125 for (auto &Row : DepMatrix) {
1126 if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I')
1127 return false;
1128 // TODO: We need to improve this heuristic.
1129 if (Row[OuterLoopId] != '=')
1130 return false;
1131 }
1132 // If outer loop has dependence and inner loop is loop independent then it is
1133 // profitable to interchange to enable parallelism.
1134 // If there are no dependences, interchanging will not improve anything.
1135 return !DepMatrix.empty();
1136}
1137
1138bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
1139 unsigned OuterLoopId,
1140 CharMatrix &DepMatrix) {
1141 // TODO: Add better profitability checks.
1142 // e.g
1143 // 1) Construct dependency matrix and move the one with no loop carried dep
1144 // inside to enable vectorization.
1145
1146 // This is rough cost estimation algorithm. It counts the good and bad order
1147 // of induction variables in the instruction and allows reordering if number
1148 // of bad orders is more than good.
1149 int Cost = getInstrOrderCost();
1150 LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cost = " << Cost
<< "\n"; } } while (false)
;
1151 if (Cost < -LoopInterchangeCostThreshold)
1152 return true;
1153
1154 // It is not profitable as per current cache profitability model. But check if
1155 // we can move this loop outside to improve parallelism.
1156 if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
1157 return true;
1158
1159 ORE->emit([&]() {
1160 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "InterchangeNotProfitable",
1161 InnerLoop->getStartLoc(),
1162 InnerLoop->getHeader())
1163 << "Interchanging loops is too costly (cost="
1164 << ore::NV("Cost", Cost) << ", threshold="
1165 << ore::NV("Threshold", LoopInterchangeCostThreshold)
1166 << ") and it does not improve parallelism.";
1167 });
1168 return false;
1169}
1170
1171void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1172 Loop *InnerLoop) {
1173 for (Loop *L : *OuterLoop)
1174 if (L == InnerLoop) {
1175 OuterLoop->removeChildLoop(L);
1176 return;
1177 }
1178 llvm_unreachable("Couldn't find loop")::llvm::llvm_unreachable_internal("Couldn't find loop", "llvm/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1178)
;
1179}
1180
1181/// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1182/// new inner and outer loop after interchanging: NewInner is the original
1183/// outer loop and NewOuter is the original inner loop.
1184///
1185/// Before interchanging, we have the following structure
1186/// Outer preheader
1187// Outer header
1188// Inner preheader
1189// Inner header
1190// Inner body
1191// Inner latch
1192// outer bbs
1193// Outer latch
1194//
1195// After interchanging:
1196// Inner preheader
1197// Inner header
1198// Outer preheader
1199// Outer header
1200// Inner body
1201// outer bbs
1202// Outer latch
1203// Inner latch
1204void LoopInterchangeTransform::restructureLoops(
1205 Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1206 BasicBlock *OrigOuterPreHeader) {
1207 Loop *OuterLoopParent = OuterLoop->getParentLoop();
1208 // The original inner loop preheader moves from the new inner loop to
1209 // the parent loop, if there is one.
1210 NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1211 LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1212
1213 // Switch the loop levels.
1214 if (OuterLoopParent) {
1215 // Remove the loop from its parent loop.
1216 removeChildLoop(OuterLoopParent, NewInner);
1217 removeChildLoop(NewInner, NewOuter);
1218 OuterLoopParent->addChildLoop(NewOuter);
1219 } else {
1220 removeChildLoop(NewInner, NewOuter);
1221 LI->changeTopLevelLoop(NewInner, NewOuter);
1222 }
1223 while (!NewOuter->isInnermost())
1224 NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1225 NewOuter->addChildLoop(NewInner);
1226
1227 // BBs from the original inner loop.
1228 SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1229
1230 // Add BBs from the original outer loop to the original inner loop (excluding
1231 // BBs already in inner loop)
1232 for (BasicBlock *BB : NewInner->blocks())
1233 if (LI->getLoopFor(BB) == NewInner)
1234 NewOuter->addBlockEntry(BB);
1235
1236 // Now remove inner loop header and latch from the new inner loop and move
1237 // other BBs (the loop body) to the new inner loop.
1238 BasicBlock *OuterHeader = NewOuter->getHeader();
1239 BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1240 for (BasicBlock *BB : OrigInnerBBs) {
1241 // Nothing will change for BBs in child loops.
1242 if (LI->getLoopFor(BB) != NewOuter)
1243 continue;
1244 // Remove the new outer loop header and latch from the new inner loop.
1245 if (BB == OuterHeader || BB == OuterLatch)
1246 NewInner->removeBlockFromLoop(BB);
1247 else
1248 LI->changeLoopFor(BB, NewInner);
1249 }
1250
1251 // The preheader of the original outer loop becomes part of the new
1252 // outer loop.
1253 NewOuter->addBlockEntry(OrigOuterPreHeader);
1254 LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1255
1256 // Tell SE that we move the loops around.
1257 SE->forgetLoop(NewOuter);
1258 SE->forgetLoop(NewInner);
1259}
1260
1261bool LoopInterchangeTransform::transform() {
1262 bool Transformed = false;
1263 Instruction *InnerIndexVar;
1264
1265 if (InnerLoop->getSubLoops().empty()) {
1266 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1267 LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Splitting the inner loop latch\n"
; } } while (false)
;
1268 PHINode *InductionPHI = getInductionVariable(InnerLoop, SE);
1269 if (!InductionPHI) {
1270 LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to find the point to split loop latch \n"
; } } while (false)
;
1271 return false;
1272 }
1273
1274 if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1275 InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(1));
1276 else
1277 InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0));
1278
1279 // Ensure that InductionPHI is the first Phi node.
1280 if (&InductionPHI->getParent()->front() != InductionPHI)
1281 InductionPHI->moveBefore(&InductionPHI->getParent()->front());
1282
1283 // Create a new latch block for the inner loop. We split at the
1284 // current latch's terminator and then move the condition and all
1285 // operands that are not either loop-invariant or the induction PHI into the
1286 // new latch block.
1287 BasicBlock *NewLatch =
1288 SplitBlock(InnerLoop->getLoopLatch(),
1289 InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
1290
1291 SmallSetVector<Instruction *, 4> WorkList;
1292 unsigned i = 0;
1293 auto MoveInstructions = [&i, &WorkList, this, InductionPHI, NewLatch]() {
1294 for (; i < WorkList.size(); i++) {
1295 // Duplicate instruction and move it the new latch. Update uses that
1296 // have been moved.
1297 Instruction *NewI = WorkList[i]->clone();
1298 NewI->insertBefore(NewLatch->getFirstNonPHI());
1299 assert(!NewI->mayHaveSideEffects() &&(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1301, __extension__
__PRETTY_FUNCTION__))
1300 "Moving instructions with side-effects may change behavior of "(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1301, __extension__
__PRETTY_FUNCTION__))
1301 "the loop nest!")(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1301, __extension__
__PRETTY_FUNCTION__))
;
1302 for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) {
1303 Instruction *UserI = cast<Instruction>(U.getUser());
1304 if (!InnerLoop->contains(UserI->getParent()) ||
1305 UserI->getParent() == NewLatch || UserI == InductionPHI)
1306 U.set(NewI);
1307 }
1308 // Add operands of moved instruction to the worklist, except if they are
1309 // outside the inner loop or are the induction PHI.
1310 for (Value *Op : WorkList[i]->operands()) {
1311 Instruction *OpI = dyn_cast<Instruction>(Op);
1312 if (!OpI ||
1313 this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
1314 OpI == InductionPHI)
1315 continue;
1316 WorkList.insert(OpI);
1317 }
1318 }
1319 };
1320
1321 // FIXME: Should we interchange when we have a constant condition?
1322 Instruction *CondI = dyn_cast<Instruction>(
1323 cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
1324 ->getCondition());
1325 if (CondI)
1326 WorkList.insert(CondI);
1327 MoveInstructions();
1328 WorkList.insert(cast<Instruction>(InnerIndexVar));
1329 MoveInstructions();
1330
1331 // Splits the inner loops phi nodes out into a separate basic block.
1332 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1333 SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1334 LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "splitting InnerLoopHeader done\n"
; } } while (false)
;
1335 }
1336
1337 // Instructions in the original inner loop preheader may depend on values
1338 // defined in the outer loop header. Move them there, because the original
1339 // inner loop preheader will become the entry into the interchanged loop nest.
1340 // Currently we move all instructions and rely on LICM to move invariant
1341 // instructions outside the loop nest.
1342 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1343 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1344 if (InnerLoopPreHeader != OuterLoopHeader) {
1345 SmallPtrSet<Instruction *, 4> NeedsMoving;
1346 for (Instruction &I :
1347 make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
1348 std::prev(InnerLoopPreHeader->end()))))
1349 I.moveBefore(OuterLoopHeader->getTerminator());
1350 }
1351
1352 Transformed |= adjustLoopLinks();
1353 if (!Transformed) {
1354 LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "adjustLoopLinks failed\n"
; } } while (false)
;
1355 return false;
1356 }
1357
1358 return true;
1359}
1360
1361/// \brief Move all instructions except the terminator from FromBB right before
1362/// InsertBefore
1363static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1364 auto &ToList = InsertBefore->getParent()->getInstList();
1365 auto &FromList = FromBB->getInstList();
1366
1367 ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
1368 FromBB->getTerminator()->getIterator());
1369}
1370
1371/// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
1372static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
1373 // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
1374 // from BB1 afterwards.
1375 auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
1376 SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
1377 for (Instruction *I : TempInstrs)
1378 I->removeFromParent();
1379
1380 // Move instructions from BB2 to BB1.
1381 moveBBContents(BB2, BB1->getTerminator());
1382
1383 // Move instructions from TempInstrs to BB2.
1384 for (Instruction *I : TempInstrs)
1385 I->insertBefore(BB2->getTerminator());
1386}
1387
1388// Update BI to jump to NewBB instead of OldBB. Records updates to the
1389// dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1390// \p OldBB is exactly once in BI's successor list.
1391static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1392 BasicBlock *NewBB,
1393 std::vector<DominatorTree::UpdateType> &DTUpdates,
1394 bool MustUpdateOnce = true) {
1395 assert((!MustUpdateOnce ||(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1399, __extension__
__PRETTY_FUNCTION__))
1396 llvm::count_if(successors(BI),(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1399, __extension__
__PRETTY_FUNCTION__))
1397 [OldBB](BasicBlock *BB) {(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1399, __extension__
__PRETTY_FUNCTION__))
1398 return BB == OldBB;(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1399, __extension__
__PRETTY_FUNCTION__))
1399 }) == 1) && "BI must jump to OldBB exactly once.")(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1399, __extension__
__PRETTY_FUNCTION__))
;
1400 bool Changed = false;
1401 for (Use &Op : BI->operands())
1402 if (Op == OldBB) {
1403 Op.set(NewBB);
1404 Changed = true;
1405 }
1406
1407 if (Changed) {
1408 DTUpdates.push_back(
1409 {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
1410 DTUpdates.push_back(
1411 {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
1412 }
1413 assert(Changed && "Expected a successor to be updated")(static_cast <bool> (Changed && "Expected a successor to be updated"
) ? void (0) : __assert_fail ("Changed && \"Expected a successor to be updated\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1413, __extension__
__PRETTY_FUNCTION__))
;
1414}
1415
1416// Move Lcssa PHIs to the right place.
1417static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
1418 BasicBlock *InnerLatch, BasicBlock *OuterHeader,
1419 BasicBlock *OuterLatch, BasicBlock *OuterExit,
1420 Loop *InnerLoop, LoopInfo *LI) {
1421
1422 // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
1423 // defined either in the header or latch. Those blocks will become header and
1424 // latch of the new outer loop, and the only possible users can PHI nodes
1425 // in the exit block of the loop nest or the outer loop header (reduction
1426 // PHIs, in that case, the incoming value must be defined in the inner loop
1427 // header). We can just substitute the user with the incoming value and remove
1428 // the PHI.
1429 for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
1430 assert(P.getNumIncomingValues() == 1 &&(static_cast <bool> (P.getNumIncomingValues() == 1 &&
"Only loops with a single exit are supported!") ? void (0) :
__assert_fail ("P.getNumIncomingValues() == 1 && \"Only loops with a single exit are supported!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1431, __extension__
__PRETTY_FUNCTION__))
1431 "Only loops with a single exit are supported!")(static_cast <bool> (P.getNumIncomingValues() == 1 &&
"Only loops with a single exit are supported!") ? void (0) :
__assert_fail ("P.getNumIncomingValues() == 1 && \"Only loops with a single exit are supported!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1431, __extension__
__PRETTY_FUNCTION__))
;
1432
1433 // Incoming values are guaranteed be instructions currently.
1434 auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
1435 // Skip phis with incoming values from the inner loop body, excluding the
1436 // header and latch.
1437 if (IncI->getParent() != InnerLatch && IncI->getParent() != InnerHeader)
1438 continue;
1439
1440 assert(all_of(P.users(),(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1448, __extension__
__PRETTY_FUNCTION__))
1441 [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1448, __extension__
__PRETTY_FUNCTION__))
1442 return (cast<PHINode>(U)->getParent() == OuterHeader &&(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1448, __extension__
__PRETTY_FUNCTION__))
1443 IncI->getParent() == InnerHeader) ||(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1448, __extension__
__PRETTY_FUNCTION__))
1444 cast<PHINode>(U)->getParent() == OuterExit;(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1448, __extension__
__PRETTY_FUNCTION__))
1445 }) &&(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1448, __extension__
__PRETTY_FUNCTION__))
1446 "Can only replace phis iff the uses are in the loop nest exit or "(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1448, __extension__
__PRETTY_FUNCTION__))
1447 "the incoming value is defined in the inner header (it will "(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1448, __extension__
__PRETTY_FUNCTION__))
1448 "dominate all loop blocks after interchanging)")(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1448, __extension__
__PRETTY_FUNCTION__))
;
1449 P.replaceAllUsesWith(IncI);
1450 P.eraseFromParent();
1451 }
1452
1453 SmallVector<PHINode *, 8> LcssaInnerExit;
1454 for (PHINode &P : InnerExit->phis())
1455 LcssaInnerExit.push_back(&P);
1456
1457 SmallVector<PHINode *, 8> LcssaInnerLatch;
1458 for (PHINode &P : InnerLatch->phis())
1459 LcssaInnerLatch.push_back(&P);
1460
1461 // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1462 // If a PHI node has users outside of InnerExit, it has a use outside the
1463 // interchanged loop and we have to preserve it. We move these to
1464 // InnerLatch, which will become the new exit block for the innermost
1465 // loop after interchanging.
1466 for (PHINode *P : LcssaInnerExit)
1467 P->moveBefore(InnerLatch->getFirstNonPHI());
1468
1469 // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1470 // and we have to move them to the new inner latch.
1471 for (PHINode *P : LcssaInnerLatch)
1472 P->moveBefore(InnerExit->getFirstNonPHI());
1473
1474 // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1475 // incoming values defined in the outer loop, we have to add a new PHI
1476 // in the inner loop latch, which became the exit block of the outer loop,
1477 // after interchanging.
1478 if (OuterExit) {
1479 for (PHINode &P : OuterExit->phis()) {
1480 if (P.getNumIncomingValues() != 1)
1481 continue;
1482 // Skip Phis with incoming values defined in the inner loop. Those should
1483 // already have been updated.
1484 auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
1485 if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
1486 continue;
1487
1488 PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
1489 NewPhi->setIncomingValue(0, P.getIncomingValue(0));
1490 NewPhi->setIncomingBlock(0, OuterLatch);
1491 // We might have incoming edges from other BBs, i.e., the original outer
1492 // header.
1493 for (auto *Pred : predecessors(InnerLatch)) {
1494 if (Pred == OuterLatch)
1495 continue;
1496 NewPhi->addIncoming(P.getIncomingValue(0), Pred);
1497 }
1498 NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
1499 P.setIncomingValue(0, NewPhi);
1500 }
1501 }
1502
1503 // Now adjust the incoming blocks for the LCSSA PHIs.
1504 // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1505 // with the new latch.
1506 InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
1507}
1508
1509bool LoopInterchangeTransform::adjustLoopBranches() {
1510 LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "adjustLoopBranches called\n"
; } } while (false)
;
1511 std::vector<DominatorTree::UpdateType> DTUpdates;
1512
1513 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1514 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1515
1516 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1518, __extension__
__PRETTY_FUNCTION__))
1517 InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1518, __extension__
__PRETTY_FUNCTION__))
1518 InnerLoopPreHeader && "Guaranteed by loop-simplify form")(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1518, __extension__
__PRETTY_FUNCTION__))
;
1519 // Ensure that both preheaders do not contain PHI nodes and have single
1520 // predecessors. This allows us to move them easily. We use
1521 // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1522 // preheaders do not satisfy those conditions.
1523 if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1524 !OuterLoopPreHeader->getUniquePredecessor())
1525 OuterLoopPreHeader =
1526 InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
1527 if (InnerLoopPreHeader == OuterLoop->getHeader())
1528 InnerLoopPreHeader =
1529 InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
1530
1531 // Adjust the loop preheader
1532 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1533 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1534 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1535 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1536 BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1537 BasicBlock *InnerLoopLatchPredecessor =
1538 InnerLoopLatch->getUniquePredecessor();
1539 BasicBlock *InnerLoopLatchSuccessor;
1540 BasicBlock *OuterLoopLatchSuccessor;
1541
1542 BranchInst *OuterLoopLatchBI =
1543 dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1544 BranchInst *InnerLoopLatchBI =
1545 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1546 BranchInst *OuterLoopHeaderBI =
1547 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1548 BranchInst *InnerLoopHeaderBI =
1549 dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1550
1551 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1552 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1553 !InnerLoopHeaderBI)
1554 return false;
1555
1556 BranchInst *InnerLoopLatchPredecessorBI =
1557 dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1558 BranchInst *OuterLoopPredecessorBI =
1559 dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1560
1561 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1562 return false;
1563 BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1564 if (!InnerLoopHeaderSuccessor)
1565 return false;
1566
1567 // Adjust Loop Preheader and headers.
1568 // The branches in the outer loop predecessor and the outer loop header can
1569 // be unconditional branches or conditional branches with duplicates. Consider
1570 // this when updating the successors.
1571 updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1572 InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
1573 // The outer loop header might or might not branch to the outer latch.
1574 // We are guaranteed to branch to the inner loop preheader.
1575 if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
1576 // In this case the outerLoopHeader should branch to the InnerLoopLatch.
1577 updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
1578 DTUpdates,
1579 /*MustUpdateOnce=*/false);
1580 }
1581 updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1582 InnerLoopHeaderSuccessor, DTUpdates,
1583 /*MustUpdateOnce=*/false);
1584
1585 // Adjust reduction PHI's now that the incoming block has changed.
1586 InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
1587 OuterLoopHeader);
1588
1589 updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1590 OuterLoopPreHeader, DTUpdates);
1591
1592 // -------------Adjust loop latches-----------
1593 if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1594 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1595 else
1596 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1597
1598 updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1599 InnerLoopLatchSuccessor, DTUpdates);
1600
1601 if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1602 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1603 else
1604 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1605
1606 updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1607 OuterLoopLatchSuccessor, DTUpdates);
1608 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1609 DTUpdates);
1610
1611 DT->applyUpdates(DTUpdates);
1612 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1613 OuterLoopPreHeader);
1614
1615 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1616 OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
1617 InnerLoop, LI);
1618 // For PHIs in the exit block of the outer loop, outer's latch has been
1619 // replaced by Inners'.
1620 OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1621
1622 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1623 // Now update the reduction PHIs in the inner and outer loop headers.
1624 SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1625 for (PHINode &PHI : InnerLoopHeader->phis())
1626 if (OuterInnerReductions.contains(&PHI))
1627 InnerLoopPHIs.push_back(cast<PHINode>(&PHI));
1628 for (PHINode &PHI : OuterLoopHeader->phis())
1629 if (OuterInnerReductions.contains(&PHI))
1630 OuterLoopPHIs.push_back(&PHI);
1631
1632 // Now move the remaining reduction PHIs from outer to inner loop header and
1633 // vice versa. The PHI nodes must be part of a reduction across the inner and
1634 // outer loop and all the remains to do is and updating the incoming blocks.
1635 for (PHINode *PHI : OuterLoopPHIs) {
1636 LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Outer loop reduction PHIs:\n"
; PHI->dump();; } } while (false)
;
1637 PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
1638 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node")(static_cast <bool> (OuterInnerReductions.count(PHI) &&
"Expected a reduction PHI node") ? void (0) : __assert_fail (
"OuterInnerReductions.count(PHI) && \"Expected a reduction PHI node\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1638, __extension__
__PRETTY_FUNCTION__))
;
1639 }
1640 for (PHINode *PHI : InnerLoopPHIs) {
1641 PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
1642 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node")(static_cast <bool> (OuterInnerReductions.count(PHI) &&
"Expected a reduction PHI node") ? void (0) : __assert_fail (
"OuterInnerReductions.count(PHI) && \"Expected a reduction PHI node\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1642, __extension__
__PRETTY_FUNCTION__))
;
1643 }
1644
1645 // Update the incoming blocks for moved PHI nodes.
1646 OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
1647 OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
1648 InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
1649 InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1650
1651 // Values defined in the outer loop header could be used in the inner loop
1652 // latch. In that case, we need to create LCSSA phis for them, because after
1653 // interchanging they will be defined in the new inner loop and used in the
1654 // new outer loop.
1655 IRBuilder<> Builder(OuterLoopHeader->getContext());
1656 SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
1657 for (Instruction &I :
1658 make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
1659 MayNeedLCSSAPhis.push_back(&I);
1660 formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE, Builder);
1661
1662 return true;
1663}
1664
1665bool LoopInterchangeTransform::adjustLoopLinks() {
1666 // Adjust all branches in the inner and outer loop.
1667 bool Changed = adjustLoopBranches();
1668 if (Changed) {
1669 // We have interchanged the preheaders so we need to interchange the data in
1670 // the preheaders as well. This is because the content of the inner
1671 // preheader was previously executed inside the outer loop.
1672 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1673 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1674 swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
1675 }
1676 return Changed;
1677}
1678
1679namespace {
1680/// Main LoopInterchange Pass.
1681struct LoopInterchangeLegacyPass : public LoopPass {
1682 static char ID;
1683
1684 LoopInterchangeLegacyPass() : LoopPass(ID) {
1685 initializeLoopInterchangeLegacyPassPass(*PassRegistry::getPassRegistry());
1686 }
1687
1688 void getAnalysisUsage(AnalysisUsage &AU) const override {
1689 AU.addRequired<DependenceAnalysisWrapperPass>();
1690 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1691
1692 getLoopAnalysisUsage(AU);
1693 }
1694
1695 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1696 if (skipLoop(L))
1697 return false;
1698
1699 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1700 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1701 auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
1702 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1703 auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1704
1705 return LoopInterchange(SE, LI, DI, DT, ORE).run(L);
1706 }
1707};
1708} // namespace
1709
1710char LoopInterchangeLegacyPass::ID = 0;
1711
1712INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange",static void *initializeLoopInterchangeLegacyPassPassOnce(PassRegistry
&Registry) {
1713 "Interchanges loops for cache reuse", false, false)static void *initializeLoopInterchangeLegacyPassPassOnce(PassRegistry
&Registry) {
1714INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry);
1715INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)initializeDependenceAnalysisWrapperPassPass(Registry);
1716INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)initializeOptimizationRemarkEmitterWrapperPassPass(Registry);
1717
1718INITIALIZE_PASS_END(LoopInterchangeLegacyPass, "loop-interchange",PassInfo *PI = new PassInfo( "Interchanges loops for cache reuse"
, "loop-interchange", &LoopInterchangeLegacyPass::ID, PassInfo
::NormalCtor_t(callDefaultCtor<LoopInterchangeLegacyPass>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeLoopInterchangeLegacyPassPassFlag
; void llvm::initializeLoopInterchangeLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeLoopInterchangeLegacyPassPassFlag
, initializeLoopInterchangeLegacyPassPassOnce, std::ref(Registry
)); }
1719 "Interchanges loops for cache reuse", false, false)PassInfo *PI = new PassInfo( "Interchanges loops for cache reuse"
, "loop-interchange", &LoopInterchangeLegacyPass::ID, PassInfo
::NormalCtor_t(callDefaultCtor<LoopInterchangeLegacyPass>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeLoopInterchangeLegacyPassPassFlag
; void llvm::initializeLoopInterchangeLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeLoopInterchangeLegacyPassPassFlag
, initializeLoopInterchangeLegacyPassPassOnce, std::ref(Registry
)); }
1720
1721Pass *llvm::createLoopInterchangePass() {
1722 return new LoopInterchangeLegacyPass();
1723}
1724
1725PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
1726 LoopAnalysisManager &AM,
1727 LoopStandardAnalysisResults &AR,
1728 LPMUpdater &U) {
1729 Function &F = *LN.getParent();
1730
1731 DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
1732 OptimizationRemarkEmitter ORE(&F);
1733 if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(LN))
1734 return PreservedAnalyses::all();
1735 return getLoopPassPreservedAnalyses();
1736}