Bug Summary

File:build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
Warning:line 1519, column 7
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name LoopInterchange.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm -resource-dir /usr/lib/llvm-16/lib/clang/16.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/lib/Transforms/Scalar -I include -I /build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm=build-llvm -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm=build-llvm -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm=build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-09-04-125545-48738-1 -x c++ /build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
1//===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This Pass handles loop interchange transform.
10// This pass interchanges loops to provide a more cache-friendly memory access
11// patterns.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/Scalar/LoopInterchange.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/Analysis/DependenceAnalysis.h"
21#include "llvm/Analysis/LoopCacheAnalysis.h"
22#include "llvm/Analysis/LoopInfo.h"
23#include "llvm/Analysis/LoopNestAnalysis.h"
24#include "llvm/Analysis/LoopPass.h"
25#include "llvm/Analysis/OptimizationRemarkEmitter.h"
26#include "llvm/Analysis/ScalarEvolution.h"
27#include "llvm/Analysis/ScalarEvolutionExpressions.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/Constants.h"
30#include "llvm/IR/DiagnosticInfo.h"
31#include "llvm/IR/Dominators.h"
32#include "llvm/IR/Function.h"
33#include "llvm/IR/IRBuilder.h"
34#include "llvm/IR/InstrTypes.h"
35#include "llvm/IR/Instruction.h"
36#include "llvm/IR/Instructions.h"
37#include "llvm/IR/User.h"
38#include "llvm/IR/Value.h"
39#include "llvm/InitializePasses.h"
40#include "llvm/Pass.h"
41#include "llvm/Support/Casting.h"
42#include "llvm/Support/CommandLine.h"
43#include "llvm/Support/Debug.h"
44#include "llvm/Support/ErrorHandling.h"
45#include "llvm/Support/raw_ostream.h"
46#include "llvm/Transforms/Scalar.h"
47#include "llvm/Transforms/Utils/BasicBlockUtils.h"
48#include "llvm/Transforms/Utils/LoopUtils.h"
49#include <cassert>
50#include <utility>
51#include <vector>
52
53using namespace llvm;
54
55#define DEBUG_TYPE"loop-interchange" "loop-interchange"
56
57STATISTIC(LoopsInterchanged, "Number of loops interchanged")static llvm::Statistic LoopsInterchanged = {"loop-interchange"
, "LoopsInterchanged", "Number of loops interchanged"}
;
58
59static cl::opt<int> LoopInterchangeCostThreshold(
60 "loop-interchange-threshold", cl::init(0), cl::Hidden,
61 cl::desc("Interchange if you gain more than this number"));
62
63namespace {
64
65using LoopVector = SmallVector<Loop *, 8>;
66
67// TODO: Check if we can use a sparse matrix here.
68using CharMatrix = std::vector<std::vector<char>>;
69
70} // end anonymous namespace
71
72// Maximum number of dependencies that can be handled in the dependency matrix.
73static const unsigned MaxMemInstrCount = 100;
74
75// Maximum loop depth supported.
76static const unsigned MaxLoopNestDepth = 10;
77
78#ifdef DUMP_DEP_MATRICIES
79static void printDepMatrix(CharMatrix &DepMatrix) {
80 for (auto &Row : DepMatrix) {
81 for (auto D : Row)
82 LLVM_DEBUG(dbgs() << D << " ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << D << " "; } } while
(false)
;
83 LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "\n"; } } while (false
)
;
84 }
85}
86#endif
87
88static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
89 Loop *L, DependenceInfo *DI,
90 ScalarEvolution *SE) {
91 using ValueVector = SmallVector<Value *, 16>;
92
93 ValueVector MemInstr;
94
95 // For each block.
96 for (BasicBlock *BB : L->blocks()) {
97 // Scan the BB and collect legal loads and stores.
98 for (Instruction &I : *BB) {
99 if (!isa<Instruction>(I))
100 return false;
101 if (auto *Ld = dyn_cast<LoadInst>(&I)) {
102 if (!Ld->isSimple())
103 return false;
104 MemInstr.push_back(&I);
105 } else if (auto *St = dyn_cast<StoreInst>(&I)) {
106 if (!St->isSimple())
107 return false;
108 MemInstr.push_back(&I);
109 }
110 }
111 }
112
113 LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found " << MemInstr
.size() << " Loads and Stores to analyze\n"; } } while (
false)
114 << " Loads and Stores to analyze\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found " << MemInstr
.size() << " Loads and Stores to analyze\n"; } } while (
false)
;
115
116 ValueVector::iterator I, IE, J, JE;
117
118 for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
119 for (J = I, JE = MemInstr.end(); J != JE; ++J) {
120 std::vector<char> Dep;
121 Instruction *Src = cast<Instruction>(*I);
122 Instruction *Dst = cast<Instruction>(*J);
123 // Ignore Input dependencies.
124 if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
125 continue;
126 // Track Output, Flow, and Anti dependencies.
127 if (auto D = DI->depends(Src, Dst, true)) {
128 assert(D->isOrdered() && "Expected an output, flow or anti dep.")(static_cast <bool> (D->isOrdered() && "Expected an output, flow or anti dep."
) ? void (0) : __assert_fail ("D->isOrdered() && \"Expected an output, flow or anti dep.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 128, __extension__
__PRETTY_FUNCTION__))
;
129 // If the direction vector is negative, normalize it to
130 // make it non-negative.
131 if (D->normalize(SE))
132 LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Negative dependence vector normalized.\n"
; } } while (false)
;
133 LLVM_DEBUG(StringRef DepType =do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
134 D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
135 dbgs() << "Found " << DepTypedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
136 << " dependency between Src and Dst\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
137 << " Src:" << *Src << "\n Dst:" << *Dst << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
;
138 unsigned Levels = D->getLevels();
139 char Direction;
140 for (unsigned II = 1; II <= Levels; ++II) {
141 if (D->isScalar(II)) {
142 Direction = 'S';
143 Dep.push_back(Direction);
144 } else {
145 unsigned Dir = D->getDirection(II);
146 if (Dir == Dependence::DVEntry::LT ||
147 Dir == Dependence::DVEntry::LE)
148 Direction = '<';
149 else if (Dir == Dependence::DVEntry::GT ||
150 Dir == Dependence::DVEntry::GE)
151 Direction = '>';
152 else if (Dir == Dependence::DVEntry::EQ)
153 Direction = '=';
154 else
155 Direction = '*';
156 Dep.push_back(Direction);
157 }
158 }
159 while (Dep.size() != Level) {
160 Dep.push_back('I');
161 }
162
163 DepMatrix.push_back(Dep);
164 if (DepMatrix.size() > MaxMemInstrCount) {
165 LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle more than "
<< MaxMemInstrCount << " dependencies inside loop\n"
; } } while (false)
166 << " dependencies inside loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle more than "
<< MaxMemInstrCount << " dependencies inside loop\n"
; } } while (false)
;
167 return false;
168 }
169 }
170 }
171 }
172
173 return true;
174}
175
176// A loop is moved from index 'from' to an index 'to'. Update the Dependence
177// matrix by exchanging the two columns.
178static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
179 unsigned ToIndx) {
180 for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I)
181 std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]);
182}
183
184// Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
185// '>'
186static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
187 unsigned Column) {
188 for (unsigned i = 0; i <= Column; ++i) {
189 if (DepMatrix[Row][i] == '<')
190 return false;
191 if (DepMatrix[Row][i] == '>')
192 return true;
193 }
194 // All dependencies were '=','S' or 'I'
195 return false;
196}
197
198// Checks if no dependence exist in the dependency matrix in Row before Column.
199static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
200 unsigned Column) {
201 for (unsigned i = 0; i < Column; ++i) {
202 if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' &&
203 DepMatrix[Row][i] != 'I')
204 return false;
205 }
206 return true;
207}
208
209static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
210 unsigned OuterLoopId, char InnerDep,
211 char OuterDep) {
212 if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
213 return false;
214
215 if (InnerDep == OuterDep)
216 return true;
217
218 // It is legal to interchange if and only if after interchange no row has a
219 // '>' direction as the leftmost non-'='.
220
221 if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
222 return true;
223
224 if (InnerDep == '<')
225 return true;
226
227 if (InnerDep == '>') {
228 // If OuterLoopId represents outermost loop then interchanging will make the
229 // 1st dependency as '>'
230 if (OuterLoopId == 0)
231 return false;
232
233 // If all dependencies before OuterloopId are '=','S'or 'I'. Then
234 // interchanging will result in this row having an outermost non '='
235 // dependency of '>'
236 if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
237 return true;
238 }
239
240 return false;
241}
242
243// Checks if it is legal to interchange 2 loops.
244// [Theorem] A permutation of the loops in a perfect nest is legal if and only
245// if the direction matrix, after the same permutation is applied to its
246// columns, has no ">" direction as the leftmost non-"=" direction in any row.
247static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
248 unsigned InnerLoopId,
249 unsigned OuterLoopId) {
250 unsigned NumRows = DepMatrix.size();
251 // For each row check if it is valid to interchange.
252 for (unsigned Row = 0; Row < NumRows; ++Row) {
253 char InnerDep = DepMatrix[Row][InnerLoopId];
254 char OuterDep = DepMatrix[Row][OuterLoopId];
255 if (InnerDep == '*' || OuterDep == '*')
256 return false;
257 if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))
258 return false;
259 }
260 return true;
261}
262
263static void populateWorklist(Loop &L, LoopVector &LoopList) {
264 LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
265 << L.getHeader()->getParent()->getName() << " Loop: %"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
266 << L.getHeader()->getName() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
;
267 assert(LoopList.empty() && "LoopList should initially be empty!")(static_cast <bool> (LoopList.empty() && "LoopList should initially be empty!"
) ? void (0) : __assert_fail ("LoopList.empty() && \"LoopList should initially be empty!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 267, __extension__
__PRETTY_FUNCTION__))
;
268 Loop *CurrentLoop = &L;
269 const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
270 while (!Vec->empty()) {
271 // The current loop has multiple subloops in it hence it is not tightly
272 // nested.
273 // Discard all loops above it added into Worklist.
274 if (Vec->size() != 1) {
275 LoopList = {};
276 return;
277 }
278
279 LoopList.push_back(CurrentLoop);
280 CurrentLoop = Vec->front();
281 Vec = &CurrentLoop->getSubLoops();
282 }
283 LoopList.push_back(CurrentLoop);
284}
285
286namespace {
287
288/// LoopInterchangeLegality checks if it is legal to interchange the loop.
289class LoopInterchangeLegality {
290public:
291 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
292 OptimizationRemarkEmitter *ORE)
293 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
294
295 /// Check if the loops can be interchanged.
296 bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
297 CharMatrix &DepMatrix);
298
299 /// Discover induction PHIs in the header of \p L. Induction
300 /// PHIs are added to \p Inductions.
301 bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
302
303 /// Check if the loop structure is understood. We do not handle triangular
304 /// loops for now.
305 bool isLoopStructureUnderstood();
306
307 bool currentLimitations();
308
309 const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
310 return OuterInnerReductions;
311 }
312
313 const SmallVectorImpl<PHINode *> &getInnerLoopInductions() const {
314 return InnerLoopInductions;
315 }
316
317private:
318 bool tightlyNested(Loop *Outer, Loop *Inner);
319 bool containsUnsafeInstructions(BasicBlock *BB);
320
321 /// Discover induction and reduction PHIs in the header of \p L. Induction
322 /// PHIs are added to \p Inductions, reductions are added to
323 /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
324 /// to be passed as \p InnerLoop.
325 bool findInductionAndReductions(Loop *L,
326 SmallVector<PHINode *, 8> &Inductions,
327 Loop *InnerLoop);
328
329 Loop *OuterLoop;
330 Loop *InnerLoop;
331
332 ScalarEvolution *SE;
333
334 /// Interface to emit optimization remarks.
335 OptimizationRemarkEmitter *ORE;
336
337 /// Set of reduction PHIs taking part of a reduction across the inner and
338 /// outer loop.
339 SmallPtrSet<PHINode *, 4> OuterInnerReductions;
340
341 /// Set of inner loop induction PHIs
342 SmallVector<PHINode *, 8> InnerLoopInductions;
343};
344
345/// LoopInterchangeProfitability checks if it is profitable to interchange the
346/// loop.
347class LoopInterchangeProfitability {
348public:
349 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
350 OptimizationRemarkEmitter *ORE)
351 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
352
353 /// Check if the loop interchange is profitable.
354 bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
355 unsigned InnerLoopId, unsigned OuterLoopId,
356 CharMatrix &DepMatrix,
357 const DenseMap<const Loop *, unsigned> &CostMap);
358
359private:
360 int getInstrOrderCost();
361
362 Loop *OuterLoop;
363 Loop *InnerLoop;
364
365 /// Scev analysis.
366 ScalarEvolution *SE;
367
368 /// Interface to emit optimization remarks.
369 OptimizationRemarkEmitter *ORE;
370};
371
372/// LoopInterchangeTransform interchanges the loop.
373class LoopInterchangeTransform {
374public:
375 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
376 LoopInfo *LI, DominatorTree *DT,
377 const LoopInterchangeLegality &LIL)
378 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
379
380 /// Interchange OuterLoop and InnerLoop.
381 bool transform();
382 void restructureLoops(Loop *NewInner, Loop *NewOuter,
383 BasicBlock *OrigInnerPreHeader,
384 BasicBlock *OrigOuterPreHeader);
385 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
386
387private:
388 bool adjustLoopLinks();
389 bool adjustLoopBranches();
390
391 Loop *OuterLoop;
392 Loop *InnerLoop;
393
394 /// Scev analysis.
395 ScalarEvolution *SE;
396
397 LoopInfo *LI;
398 DominatorTree *DT;
399
400 const LoopInterchangeLegality &LIL;
401};
402
403struct LoopInterchange {
404 ScalarEvolution *SE = nullptr;
405 LoopInfo *LI = nullptr;
406 DependenceInfo *DI = nullptr;
407 DominatorTree *DT = nullptr;
408 std::unique_ptr<CacheCost> CC = nullptr;
409
410 /// Interface to emit optimization remarks.
411 OptimizationRemarkEmitter *ORE;
412
413 LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
414 DominatorTree *DT, std::unique_ptr<CacheCost> &CC,
415 OptimizationRemarkEmitter *ORE)
416 : SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC)), ORE(ORE) {}
417
418 bool run(Loop *L) {
419 if (L->getParentLoop())
420 return false;
421 SmallVector<Loop *, 8> LoopList;
422 populateWorklist(*L, LoopList);
423 return processLoopList(LoopList);
424 }
425
426 bool run(LoopNest &LN) {
427 SmallVector<Loop *, 8> LoopList(LN.getLoops().begin(), LN.getLoops().end());
428 for (unsigned I = 1; I < LoopList.size(); ++I)
429 if (LoopList[I]->getParentLoop() != LoopList[I - 1])
430 return false;
431 return processLoopList(LoopList);
432 }
433
434 bool isComputableLoopNest(ArrayRef<Loop *> LoopList) {
435 for (Loop *L : LoopList) {
436 const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
437 if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
438 LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Couldn't compute backedge count\n"
; } } while (false)
;
439 return false;
440 }
441 if (L->getNumBackEdges() != 1) {
442 LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "NumBackEdges is not equal to 1\n"
; } } while (false)
;
443 return false;
444 }
445 if (!L->getExitingBlock()) {
446 LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop doesn't have unique exit block\n"
; } } while (false)
;
447 return false;
448 }
449 }
450 return true;
451 }
452
453 unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
454 // TODO: Add a better heuristic to select the loop to be interchanged based
455 // on the dependence matrix. Currently we select the innermost loop.
456 return LoopList.size() - 1;
457 }
458
459 bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
460 bool Changed = false;
461 unsigned LoopNestDepth = LoopList.size();
462 if (LoopNestDepth < 2) {
463 LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop doesn't contain minimum nesting level.\n"
; } } while (false)
;
464 return false;
465 }
466 if (LoopNestDepth > MaxLoopNestDepth) {
467 LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle loops of depth greater than "
<< MaxLoopNestDepth << "\n"; } } while (false)
468 << MaxLoopNestDepth << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle loops of depth greater than "
<< MaxLoopNestDepth << "\n"; } } while (false)
;
469 return false;
470 }
471 if (!isComputableLoopNest(LoopList)) {
472 LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not valid loop candidate for interchange\n"
; } } while (false)
;
473 return false;
474 }
475
476 LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepthdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing LoopList of size = "
<< LoopNestDepth << "\n"; } } while (false)
477 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing LoopList of size = "
<< LoopNestDepth << "\n"; } } while (false)
;
478
479 CharMatrix DependencyMatrix;
480 Loop *OuterMostLoop = *(LoopList.begin());
481 if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
482 OuterMostLoop, DI, SE)) {
483 LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Populating dependency matrix failed\n"
; } } while (false)
;
484 return false;
485 }
486#ifdef DUMP_DEP_MATRICIES
487 LLVM_DEBUG(dbgs() << "Dependence before interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Dependence before interchange\n"
; } } while (false)
;
488 printDepMatrix(DependencyMatrix);
489#endif
490
491 // Get the Outermost loop exit.
492 BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
493 if (!LoopNestExit) {
494 LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "OuterMostLoop needs an unique exit block"
; } } while (false)
;
495 return false;
496 }
497
498 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
499 // Obtain the loop vector returned from loop cache analysis beforehand,
500 // and put each <Loop, index> pair into a map for constant time query
501 // later. Indices in loop vector reprsent the optimal order of the
502 // corresponding loop, e.g., given a loopnest with depth N, index 0
503 // indicates the loop should be placed as the outermost loop and index N
504 // indicates the loop should be placed as the innermost loop.
505 //
506 // For the old pass manager CacheCost would be null.
507 DenseMap<const Loop *, unsigned> CostMap;
508 if (CC != nullptr) {
509 const auto &LoopCosts = CC->getLoopCosts();
510 for (unsigned i = 0; i < LoopCosts.size(); i++) {
511 CostMap[LoopCosts[i].first] = i;
512 }
513 }
514 // We try to achieve the globally optimal memory access for the loopnest,
515 // and do interchange based on a bubble-sort fasion. We start from
516 // the innermost loop, move it outwards to the best possible position
517 // and repeat this process.
518 for (unsigned j = SelecLoopId; j > 0; j--) {
519 bool ChangedPerIter = false;
520 for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
521 bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
522 DependencyMatrix, CostMap);
523 if (!Interchanged)
524 continue;
525 // Loops interchanged, update LoopList accordingly.
526 std::swap(LoopList[i - 1], LoopList[i]);
527 // Update the DependencyMatrix
528 interChangeDependencies(DependencyMatrix, i, i - 1);
529#ifdef DUMP_DEP_MATRICIES
530 LLVM_DEBUG(dbgs() << "Dependence after interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Dependence after interchange\n"
; } } while (false)
;
531 printDepMatrix(DependencyMatrix);
532#endif
533 ChangedPerIter |= Interchanged;
534 Changed |= Interchanged;
535 }
536 // Early abort if there was no interchange during an entire round of
537 // moving loops outwards.
538 if (!ChangedPerIter)
539 break;
540 }
541 return Changed;
542 }
543
544 bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
545 unsigned OuterLoopId,
546 std::vector<std::vector<char>> &DependencyMatrix,
547 const DenseMap<const Loop *, unsigned> &CostMap) {
548 LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << "\n"; } } while (false)
549 << " and OuterLoopId = " << OuterLoopId << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << "\n"; } } while (false)
;
550 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
551 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
552 LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not interchanging loops. Cannot prove legality.\n"
; } } while (false)
;
553 return false;
554 }
555 LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops are legal to interchange\n"
; } } while (false)
;
556 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
557 if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
558 DependencyMatrix, CostMap)) {
559 LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Interchanging loops not profitable.\n"
; } } while (false)
;
560 return false;
561 }
562
563 ORE->emit([&]() {
564 return OptimizationRemark(DEBUG_TYPE"loop-interchange", "Interchanged",
565 InnerLoop->getStartLoc(),
566 InnerLoop->getHeader())
567 << "Loop interchanged with enclosing loop.";
568 });
569
570 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
571 LIT.transform();
572 LLVM_DEBUG(dbgs() << "Loops interchanged.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops interchanged.\n"
; } } while (false)
;
573 LoopsInterchanged++;
574
575 assert(InnerLoop->isLCSSAForm(*DT) &&(static_cast <bool> (InnerLoop->isLCSSAForm(*DT) &&
"Inner loop not left in LCSSA form after loop interchange!")
? void (0) : __assert_fail ("InnerLoop->isLCSSAForm(*DT) && \"Inner loop not left in LCSSA form after loop interchange!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 576, __extension__
__PRETTY_FUNCTION__))
576 "Inner loop not left in LCSSA form after loop interchange!")(static_cast <bool> (InnerLoop->isLCSSAForm(*DT) &&
"Inner loop not left in LCSSA form after loop interchange!")
? void (0) : __assert_fail ("InnerLoop->isLCSSAForm(*DT) && \"Inner loop not left in LCSSA form after loop interchange!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 576, __extension__
__PRETTY_FUNCTION__))
;
577 assert(OuterLoop->isLCSSAForm(*DT) &&(static_cast <bool> (OuterLoop->isLCSSAForm(*DT) &&
"Outer loop not left in LCSSA form after loop interchange!")
? void (0) : __assert_fail ("OuterLoop->isLCSSAForm(*DT) && \"Outer loop not left in LCSSA form after loop interchange!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 578, __extension__
__PRETTY_FUNCTION__))
578 "Outer loop not left in LCSSA form after loop interchange!")(static_cast <bool> (OuterLoop->isLCSSAForm(*DT) &&
"Outer loop not left in LCSSA form after loop interchange!")
? void (0) : __assert_fail ("OuterLoop->isLCSSAForm(*DT) && \"Outer loop not left in LCSSA form after loop interchange!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 578, __extension__
__PRETTY_FUNCTION__))
;
579
580 return true;
581 }
582};
583
584} // end anonymous namespace
585
586bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
587 return any_of(*BB, [](const Instruction &I) {
588 return I.mayHaveSideEffects() || I.mayReadFromMemory();
589 });
590}
591
592bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
593 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
594 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
595 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
596
597 LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Checking if loops are tightly nested\n"
; } } while (false)
;
598
599 // A perfectly nested loop will not have any branch in between the outer and
600 // inner block i.e. outer header will branch to either inner preheader and
601 // outerloop latch.
602 BranchInst *OuterLoopHeaderBI =
603 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
604 if (!OuterLoopHeaderBI)
605 return false;
606
607 for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
608 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
609 Succ != OuterLoopLatch)
610 return false;
611
612 LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Checking instructions in Loop header and Loop latch\n"
; } } while (false)
;
613 // We do not have any basic block in between now make sure the outer header
614 // and outer loop latch doesn't contain any unsafe instructions.
615 if (containsUnsafeInstructions(OuterLoopHeader) ||
616 containsUnsafeInstructions(OuterLoopLatch))
617 return false;
618
619 // Also make sure the inner loop preheader does not contain any unsafe
620 // instructions. Note that all instructions in the preheader will be moved to
621 // the outer loop header when interchanging.
622 if (InnerLoopPreHeader != OuterLoopHeader &&
623 containsUnsafeInstructions(InnerLoopPreHeader))
624 return false;
625
626 BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
627 // Ensure the inner loop exit block flows to the outer loop latch possibly
628 // through empty blocks.
629 const BasicBlock &SuccInner =
630 LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch);
631 if (&SuccInner != OuterLoopLatch) {
632 LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExitdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop exit block "
<< *InnerLoopExit << " does not lead to the outer loop latch.\n"
;; } } while (false)
633 << " does not lead to the outer loop latch.\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop exit block "
<< *InnerLoopExit << " does not lead to the outer loop latch.\n"
;; } } while (false)
;
634 return false;
635 }
636 // The inner loop exit block does flow to the outer loop latch and not some
637 // other BBs, now make sure it contains safe instructions, since it will be
638 // moved into the (new) inner loop after interchange.
639 if (containsUnsafeInstructions(InnerLoopExit))
640 return false;
641
642 LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops are perfectly nested\n"
; } } while (false)
;
643 // We have a perfect loop nest.
644 return true;
645}
646
647bool LoopInterchangeLegality::isLoopStructureUnderstood() {
648 BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
649 for (PHINode *InnerInduction : InnerLoopInductions) {
650 unsigned Num = InnerInduction->getNumOperands();
651 for (unsigned i = 0; i < Num; ++i) {
652 Value *Val = InnerInduction->getOperand(i);
653 if (isa<Constant>(Val))
654 continue;
655 Instruction *I = dyn_cast<Instruction>(Val);
656 if (!I)
657 return false;
658 // TODO: Handle triangular loops.
659 // e.g. for(int i=0;i<N;i++)
660 // for(int j=i;j<N;j++)
661 unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
662 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
663 InnerLoopPreheader &&
664 !OuterLoop->isLoopInvariant(I)) {
665 return false;
666 }
667 }
668 }
669
670 // TODO: Handle triangular loops of another form.
671 // e.g. for(int i=0;i<N;i++)
672 // for(int j=0;j<i;j++)
673 // or,
674 // for(int i=0;i<N;i++)
675 // for(int j=0;j*i<N;j++)
676 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
677 BranchInst *InnerLoopLatchBI =
678 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
679 if (!InnerLoopLatchBI->isConditional())
680 return false;
681 if (CmpInst *InnerLoopCmp =
682 dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) {
683 Value *Op0 = InnerLoopCmp->getOperand(0);
684 Value *Op1 = InnerLoopCmp->getOperand(1);
685
686 // LHS and RHS of the inner loop exit condition, e.g.,
687 // in "for(int j=0;j<i;j++)", LHS is j and RHS is i.
688 Value *Left = nullptr;
689 Value *Right = nullptr;
690
691 // Check if V only involves inner loop induction variable.
692 // Return true if V is InnerInduction, or a cast from
693 // InnerInduction, or a binary operator that involves
694 // InnerInduction and a constant.
695 std::function<bool(Value *)> IsPathToInnerIndVar;
696 IsPathToInnerIndVar = [this, &IsPathToInnerIndVar](const Value *V) -> bool {
697 if (llvm::is_contained(InnerLoopInductions, V))
698 return true;
699 if (isa<Constant>(V))
700 return true;
701 const Instruction *I = dyn_cast<Instruction>(V);
702 if (!I)
703 return false;
704 if (isa<CastInst>(I))
705 return IsPathToInnerIndVar(I->getOperand(0));
706 if (isa<BinaryOperator>(I))
707 return IsPathToInnerIndVar(I->getOperand(0)) &&
708 IsPathToInnerIndVar(I->getOperand(1));
709 return false;
710 };
711
712 // In case of multiple inner loop indvars, it is okay if LHS and RHS
713 // are both inner indvar related variables.
714 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
715 return true;
716
717 // Otherwise we check if the cmp instruction compares an inner indvar
718 // related variable (Left) with a outer loop invariant (Right).
719 if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
720 Left = Op0;
721 Right = Op1;
722 } else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
723 Left = Op1;
724 Right = Op0;
725 }
726
727 if (Left == nullptr)
728 return false;
729
730 const SCEV *S = SE->getSCEV(Right);
731 if (!SE->isLoopInvariant(S, OuterLoop))
732 return false;
733 }
734
735 return true;
736}
737
738// If SV is a LCSSA PHI node with a single incoming value, return the incoming
739// value.
740static Value *followLCSSA(Value *SV) {
741 PHINode *PHI = dyn_cast<PHINode>(SV);
742 if (!PHI)
743 return SV;
744
745 if (PHI->getNumIncomingValues() != 1)
746 return SV;
747 return followLCSSA(PHI->getIncomingValue(0));
748}
749
750// Check V's users to see if it is involved in a reduction in L.
751static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
752 // Reduction variables cannot be constants.
753 if (isa<Constant>(V))
754 return nullptr;
755
756 for (Value *User : V->users()) {
757 if (PHINode *PHI = dyn_cast<PHINode>(User)) {
758 if (PHI->getNumIncomingValues() == 1)
759 continue;
760 RecurrenceDescriptor RD;
761 if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) {
762 // Detect floating point reduction only when it can be reordered.
763 if (RD.getExactFPMathInst() != nullptr)
764 return nullptr;
765 return PHI;
766 }
767 return nullptr;
768 }
769 }
770
771 return nullptr;
772}
773
774bool LoopInterchangeLegality::findInductionAndReductions(
775 Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
776 if (!L->getLoopLatch() || !L->getLoopPredecessor())
777 return false;
778 for (PHINode &PHI : L->getHeader()->phis()) {
779 RecurrenceDescriptor RD;
780 InductionDescriptor ID;
781 if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
782 Inductions.push_back(&PHI);
783 else {
784 // PHIs in inner loops need to be part of a reduction in the outer loop,
785 // discovered when checking the PHIs of the outer loop earlier.
786 if (!InnerLoop) {
787 if (!OuterInnerReductions.count(&PHI)) {
788 LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop PHI is not part of reductions "
"across the outer loop.\n"; } } while (false)
789 "across the outer loop.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop PHI is not part of reductions "
"across the outer loop.\n"; } } while (false)
;
790 return false;
791 }
792 } else {
793 assert(PHI.getNumIncomingValues() == 2 &&(static_cast <bool> (PHI.getNumIncomingValues() == 2 &&
"Phis in loop header should have exactly 2 incoming values")
? void (0) : __assert_fail ("PHI.getNumIncomingValues() == 2 && \"Phis in loop header should have exactly 2 incoming values\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 794, __extension__
__PRETTY_FUNCTION__))
794 "Phis in loop header should have exactly 2 incoming values")(static_cast <bool> (PHI.getNumIncomingValues() == 2 &&
"Phis in loop header should have exactly 2 incoming values")
? void (0) : __assert_fail ("PHI.getNumIncomingValues() == 2 && \"Phis in loop header should have exactly 2 incoming values\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 794, __extension__
__PRETTY_FUNCTION__))
;
795 // Check if we have a PHI node in the outer loop that has a reduction
796 // result from the inner loop as an incoming value.
797 Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
798 PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
799 if (!InnerRedPhi ||
800 !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
801 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
802 dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
803 << "Failed to recognize PHI as an induction or reduction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
;
804 return false;
805 }
806 OuterInnerReductions.insert(&PHI);
807 OuterInnerReductions.insert(InnerRedPhi);
808 }
809 }
810 }
811 return true;
812}
813
814// This function indicates the current limitations in the transform as a result
815// of which we do not proceed.
816bool LoopInterchangeLegality::currentLimitations() {
817 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
818
819 // transform currently expects the loop latches to also be the exiting
820 // blocks.
821 if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
822 OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
823 !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
824 !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
825 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
826 dbgs() << "Loops where the latch is not the exiting block are not"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
827 << " supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
;
828 ORE->emit([&]() {
829 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "ExitingNotLatch",
830 OuterLoop->getStartLoc(),
831 OuterLoop->getHeader())
832 << "Loops where the latch is not the exiting block cannot be"
833 " interchange currently.";
834 });
835 return true;
836 }
837
838 SmallVector<PHINode *, 8> Inductions;
839 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
840 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
841 dbgs() << "Only outer loops with induction or reduction PHI nodes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
842 << "are supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
;
843 ORE->emit([&]() {
844 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIOuter",
845 OuterLoop->getStartLoc(),
846 OuterLoop->getHeader())
847 << "Only outer loops with induction or reduction PHI nodes can be"
848 " interchanged currently.";
849 });
850 return true;
851 }
852
853 Inductions.clear();
854 if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) {
855 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
856 dbgs() << "Only inner loops with induction or reduction PHI nodes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
857 << "are supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
;
858 ORE->emit([&]() {
859 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIInner",
860 InnerLoop->getStartLoc(),
861 InnerLoop->getHeader())
862 << "Only inner loops with induction or reduction PHI nodes can be"
863 " interchange currently.";
864 });
865 return true;
866 }
867
868 // TODO: Triangular loops are not handled for now.
869 if (!isLoopStructureUnderstood()) {
870 LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop structure not understood by pass\n"
; } } while (false)
;
871 ORE->emit([&]() {
872 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedStructureInner",
873 InnerLoop->getStartLoc(),
874 InnerLoop->getHeader())
875 << "Inner loop structure not understood currently.";
876 });
877 return true;
878 }
879
880 return false;
881}
882
883bool LoopInterchangeLegality::findInductions(
884 Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
885 for (PHINode &PHI : L->getHeader()->phis()) {
886 InductionDescriptor ID;
887 if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
888 Inductions.push_back(&PHI);
889 }
890 return !Inductions.empty();
891}
892
893// We currently only support LCSSA PHI nodes in the inner loop exit, if their
894// users are either reduction PHIs or PHIs outside the outer loop (which means
895// the we are only interested in the final value after the loop).
896static bool
897areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL,
898 SmallPtrSetImpl<PHINode *> &Reductions) {
899 BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
900 for (PHINode &PHI : InnerExit->phis()) {
901 // Reduction lcssa phi will have only 1 incoming block that from loop latch.
902 if (PHI.getNumIncomingValues() > 1)
903 return false;
904 if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
905 PHINode *PN = dyn_cast<PHINode>(U);
906 return !PN ||
907 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
908 })) {
909 return false;
910 }
911 }
912 return true;
913}
914
915// We currently support LCSSA PHI nodes in the outer loop exit, if their
916// incoming values do not come from the outer loop latch or if the
917// outer loop latch has a single predecessor. In that case, the value will
918// be available if both the inner and outer loop conditions are true, which
919// will still be true after interchanging. If we have multiple predecessor,
920// that may not be the case, e.g. because the outer loop latch may be executed
921// if the inner loop is not executed.
922static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
923 BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
924 for (PHINode &PHI : LoopNestExit->phis()) {
925 for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
926 Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
927 if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
928 continue;
929
930 // The incoming value is defined in the outer loop latch. Currently we
931 // only support that in case the outer loop latch has a single predecessor.
932 // This guarantees that the outer loop latch is executed if and only if
933 // the inner loop is executed (because tightlyNested() guarantees that the
934 // outer loop header only branches to the inner loop or the outer loop
935 // latch).
936 // FIXME: We could weaken this logic and allow multiple predecessors,
937 // if the values are produced outside the loop latch. We would need
938 // additional logic to update the PHI nodes in the exit block as
939 // well.
940 if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
941 return false;
942 }
943 }
944 return true;
945}
946
947// In case of multi-level nested loops, it may occur that lcssa phis exist in
948// the latch of InnerLoop, i.e., when defs of the incoming values are further
949// inside the loopnest. Sometimes those incoming values are not available
950// after interchange, since the original inner latch will become the new outer
951// latch which may have predecessor paths that do not include those incoming
952// values.
953// TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of
954// multi-level loop nests.
955static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
956 if (InnerLoop->getSubLoops().empty())
957 return true;
958 // If the original outer latch has only one predecessor, then values defined
959 // further inside the looploop, e.g., in the innermost loop, will be available
960 // at the new outer latch after interchange.
961 if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr)
962 return true;
963
964 // The outer latch has more than one predecessors, i.e., the inner
965 // exit and the inner header.
966 // PHI nodes in the inner latch are lcssa phis where the incoming values
967 // are defined further inside the loopnest. Check if those phis are used
968 // in the original inner latch. If that is the case then bail out since
969 // those incoming values may not be available at the new outer latch.
970 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
971 for (PHINode &PHI : InnerLoopLatch->phis()) {
972 for (auto *U : PHI.users()) {
973 Instruction *UI = cast<Instruction>(U);
974 if (InnerLoopLatch == UI->getParent())
975 return false;
976 }
977 }
978 return true;
979}
980
981bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
982 unsigned OuterLoopId,
983 CharMatrix &DepMatrix) {
984 if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
985 LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
986 << " and OuterLoopId = " << OuterLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
987 << " due to dependence\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
;
988 ORE->emit([&]() {
989 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "Dependence",
990 InnerLoop->getStartLoc(),
991 InnerLoop->getHeader())
992 << "Cannot interchange loops due to dependences.";
993 });
994 return false;
995 }
996 // Check if outer and inner loop contain legal instructions only.
997 for (auto *BB : OuterLoop->blocks())
998 for (Instruction &I : BB->instructionsWithoutDebug())
999 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1000 // readnone functions do not prevent interchanging.
1001 if (CI->onlyWritesMemory())
1002 continue;
1003 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
1004 dbgs() << "Loops with call instructions cannot be interchanged "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
1005 << "safely.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
;
1006 ORE->emit([&]() {
1007 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "CallInst",
1008 CI->getDebugLoc(),
1009 CI->getParent())
1010 << "Cannot interchange loops due to call instruction.";
1011 });
1012
1013 return false;
1014 }
1015
1016 if (!findInductions(InnerLoop, InnerLoopInductions)) {
1017 LLVM_DEBUG(dbgs() << "Cound not find inner loop induction variables.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cound not find inner loop induction variables.\n"
; } } while (false)
;
1018 return false;
1019 }
1020
1021 if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) {
1022 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in inner loop latch.\n"
; } } while (false)
;
1023 ORE->emit([&]() {
1024 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedInnerLatchPHI",
1025 InnerLoop->getStartLoc(),
1026 InnerLoop->getHeader())
1027 << "Cannot interchange loops because unsupported PHI nodes found "
1028 "in inner loop latch.";
1029 });
1030 return false;
1031 }
1032
1033 // TODO: The loops could not be interchanged due to current limitations in the
1034 // transform module.
1035 if (currentLimitations()) {
1036 LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not legal because of current transform limitation\n"
; } } while (false)
;
1037 return false;
1038 }
1039
1040 // Check if the loops are tightly nested.
1041 if (!tightlyNested(OuterLoop, InnerLoop)) {
1042 LLVM_DEBUG(dbgs() << "Loops not tightly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops not tightly nested\n"
; } } while (false)
;
1043 ORE->emit([&]() {
1044 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NotTightlyNested",
1045 InnerLoop->getStartLoc(),
1046 InnerLoop->getHeader())
1047 << "Cannot interchange loops because they are not tightly "
1048 "nested.";
1049 });
1050 return false;
1051 }
1052
1053 if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1054 OuterInnerReductions)) {
1055 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in inner loop exit.\n"
; } } while (false)
;
1056 ORE->emit([&]() {
1057 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedExitPHI",
1058 InnerLoop->getStartLoc(),
1059 InnerLoop->getHeader())
1060 << "Found unsupported PHI node in loop exit.";
1061 });
1062 return false;
1063 }
1064
1065 if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1066 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in outer loop exit.\n"
; } } while (false)
;
1067 ORE->emit([&]() {
1068 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedExitPHI",
1069 OuterLoop->getStartLoc(),
1070 OuterLoop->getHeader())
1071 << "Found unsupported PHI node in loop exit.";
1072 });
1073 return false;
1074 }
1075
1076 return true;
1077}
1078
1079int LoopInterchangeProfitability::getInstrOrderCost() {
1080 unsigned GoodOrder, BadOrder;
1081 BadOrder = GoodOrder = 0;
1082 for (BasicBlock *BB : InnerLoop->blocks()) {
1083 for (Instruction &Ins : *BB) {
1084 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1085 unsigned NumOp = GEP->getNumOperands();
1086 bool FoundInnerInduction = false;
1087 bool FoundOuterInduction = false;
1088 for (unsigned i = 0; i < NumOp; ++i) {
1089 // Skip operands that are not SCEV-able.
1090 if (!SE->isSCEVable(GEP->getOperand(i)->getType()))
1091 continue;
1092
1093 const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
1094 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1095 if (!AR)
1096 continue;
1097
1098 // If we find the inner induction after an outer induction e.g.
1099 // for(int i=0;i<N;i++)
1100 // for(int j=0;j<N;j++)
1101 // A[i][j] = A[i-1][j-1]+k;
1102 // then it is a good order.
1103 if (AR->getLoop() == InnerLoop) {
1104 // We found an InnerLoop induction after OuterLoop induction. It is
1105 // a good order.
1106 FoundInnerInduction = true;
1107 if (FoundOuterInduction) {
1108 GoodOrder++;
1109 break;
1110 }
1111 }
1112 // If we find the outer induction after an inner induction e.g.
1113 // for(int i=0;i<N;i++)
1114 // for(int j=0;j<N;j++)
1115 // A[j][i] = A[j-1][i-1]+k;
1116 // then it is a bad order.
1117 if (AR->getLoop() == OuterLoop) {
1118 // We found an OuterLoop induction after InnerLoop induction. It is
1119 // a bad order.
1120 FoundOuterInduction = true;
1121 if (FoundInnerInduction) {
1122 BadOrder++;
1123 break;
1124 }
1125 }
1126 }
1127 }
1128 }
1129 }
1130 return GoodOrder - BadOrder;
1131}
1132
1133static bool isProfitableForVectorization(unsigned InnerLoopId,
1134 unsigned OuterLoopId,
1135 CharMatrix &DepMatrix) {
1136 // TODO: Improve this heuristic to catch more cases.
1137 // If the inner loop is loop independent or doesn't carry any dependency it is
1138 // profitable to move this to outer position.
1139 for (auto &Row : DepMatrix) {
1140 if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I')
1141 return false;
1142 // TODO: We need to improve this heuristic.
1143 if (Row[OuterLoopId] != '=')
1144 return false;
1145 }
1146 // If outer loop has dependence and inner loop is loop independent then it is
1147 // profitable to interchange to enable parallelism.
1148 // If there are no dependences, interchanging will not improve anything.
1149 return !DepMatrix.empty();
1150}
1151
1152bool LoopInterchangeProfitability::isProfitable(
1153 const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
1154 unsigned OuterLoopId, CharMatrix &DepMatrix,
1155 const DenseMap<const Loop *, unsigned> &CostMap) {
1156 // TODO: Remove the legacy cost model.
1157
1158 // This is the new cost model returned from loop cache analysis.
1159 // A smaller index means the loop should be placed an outer loop, and vice
1160 // versa.
1161 if (CostMap.find(InnerLoop) != CostMap.end() &&
1162 CostMap.find(OuterLoop) != CostMap.end()) {
1163 unsigned InnerIndex = 0, OuterIndex = 0;
1164 InnerIndex = CostMap.find(InnerLoop)->second;
1165 OuterIndex = CostMap.find(OuterLoop)->second;
1166 LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndexdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "InnerIndex = " <<
InnerIndex << ", OuterIndex = " << OuterIndex <<
"\n"; } } while (false)
1167 << ", OuterIndex = " << OuterIndex << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "InnerIndex = " <<
InnerIndex << ", OuterIndex = " << OuterIndex <<
"\n"; } } while (false)
;
1168 if (InnerIndex < OuterIndex)
1169 return true;
1170 } else {
1171 // Legacy cost model: this is rough cost estimation algorithm. It counts the
1172 // good and bad order of induction variables in the instruction and allows
1173 // reordering if number of bad orders is more than good.
1174 int Cost = getInstrOrderCost();
1175 LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cost = " << Cost
<< "\n"; } } while (false)
;
1176 if (Cost < -LoopInterchangeCostThreshold)
1177 return true;
1178 }
1179
1180 // It is not profitable as per current cache profitability model. But check if
1181 // we can move this loop outside to improve parallelism.
1182 if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
1183 return true;
1184
1185 ORE->emit([&]() {
1186 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "InterchangeNotProfitable",
1187 InnerLoop->getStartLoc(),
1188 InnerLoop->getHeader())
1189 << "Interchanging loops is too costly and it does not improve "
1190 "parallelism.";
1191 });
1192 return false;
1193}
1194
1195void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1196 Loop *InnerLoop) {
1197 for (Loop *L : *OuterLoop)
1198 if (L == InnerLoop) {
1199 OuterLoop->removeChildLoop(L);
1200 return;
1201 }
1202 llvm_unreachable("Couldn't find loop")::llvm::llvm_unreachable_internal("Couldn't find loop", "llvm/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1202)
;
1203}
1204
1205/// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1206/// new inner and outer loop after interchanging: NewInner is the original
1207/// outer loop and NewOuter is the original inner loop.
1208///
1209/// Before interchanging, we have the following structure
1210/// Outer preheader
1211// Outer header
1212// Inner preheader
1213// Inner header
1214// Inner body
1215// Inner latch
1216// outer bbs
1217// Outer latch
1218//
1219// After interchanging:
1220// Inner preheader
1221// Inner header
1222// Outer preheader
1223// Outer header
1224// Inner body
1225// outer bbs
1226// Outer latch
1227// Inner latch
1228void LoopInterchangeTransform::restructureLoops(
1229 Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1230 BasicBlock *OrigOuterPreHeader) {
1231 Loop *OuterLoopParent = OuterLoop->getParentLoop();
1232 // The original inner loop preheader moves from the new inner loop to
1233 // the parent loop, if there is one.
1234 NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1235 LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1236
1237 // Switch the loop levels.
1238 if (OuterLoopParent) {
1239 // Remove the loop from its parent loop.
1240 removeChildLoop(OuterLoopParent, NewInner);
1241 removeChildLoop(NewInner, NewOuter);
1242 OuterLoopParent->addChildLoop(NewOuter);
1243 } else {
1244 removeChildLoop(NewInner, NewOuter);
1245 LI->changeTopLevelLoop(NewInner, NewOuter);
1246 }
1247 while (!NewOuter->isInnermost())
1248 NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1249 NewOuter->addChildLoop(NewInner);
1250
1251 // BBs from the original inner loop.
1252 SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1253
1254 // Add BBs from the original outer loop to the original inner loop (excluding
1255 // BBs already in inner loop)
1256 for (BasicBlock *BB : NewInner->blocks())
1257 if (LI->getLoopFor(BB) == NewInner)
1258 NewOuter->addBlockEntry(BB);
1259
1260 // Now remove inner loop header and latch from the new inner loop and move
1261 // other BBs (the loop body) to the new inner loop.
1262 BasicBlock *OuterHeader = NewOuter->getHeader();
1263 BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1264 for (BasicBlock *BB : OrigInnerBBs) {
1265 // Nothing will change for BBs in child loops.
1266 if (LI->getLoopFor(BB) != NewOuter)
1267 continue;
1268 // Remove the new outer loop header and latch from the new inner loop.
1269 if (BB == OuterHeader || BB == OuterLatch)
1270 NewInner->removeBlockFromLoop(BB);
1271 else
1272 LI->changeLoopFor(BB, NewInner);
1273 }
1274
1275 // The preheader of the original outer loop becomes part of the new
1276 // outer loop.
1277 NewOuter->addBlockEntry(OrigOuterPreHeader);
1278 LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1279
1280 // Tell SE that we move the loops around.
1281 SE->forgetLoop(NewOuter);
1282 SE->forgetLoop(NewInner);
1283}
1284
1285bool LoopInterchangeTransform::transform() {
1286 bool Transformed = false;
1287
1288 if (InnerLoop->getSubLoops().empty()) {
1289 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1290 LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Splitting the inner loop latch\n"
; } } while (false)
;
1291 auto &InductionPHIs = LIL.getInnerLoopInductions();
1292 if (InductionPHIs.empty()) {
1293 LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to find the point to split loop latch \n"
; } } while (false)
;
1294 return false;
1295 }
1296
1297 SmallVector<Instruction *, 8> InnerIndexVarList;
1298 for (PHINode *CurInductionPHI : InductionPHIs) {
1299 if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1300 InnerIndexVarList.push_back(
1301 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
1302 else
1303 InnerIndexVarList.push_back(
1304 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
1305 }
1306
1307 // Create a new latch block for the inner loop. We split at the
1308 // current latch's terminator and then move the condition and all
1309 // operands that are not either loop-invariant or the induction PHI into the
1310 // new latch block.
1311 BasicBlock *NewLatch =
1312 SplitBlock(InnerLoop->getLoopLatch(),
1313 InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
1314
1315 SmallSetVector<Instruction *, 4> WorkList;
1316 unsigned i = 0;
1317 auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() {
1318 for (; i < WorkList.size(); i++) {
1319 // Duplicate instruction and move it the new latch. Update uses that
1320 // have been moved.
1321 Instruction *NewI = WorkList[i]->clone();
1322 NewI->insertBefore(NewLatch->getFirstNonPHI());
1323 assert(!NewI->mayHaveSideEffects() &&(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1325, __extension__
__PRETTY_FUNCTION__))
1324 "Moving instructions with side-effects may change behavior of "(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1325, __extension__
__PRETTY_FUNCTION__))
1325 "the loop nest!")(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1325, __extension__
__PRETTY_FUNCTION__))
;
1326 for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) {
1327 Instruction *UserI = cast<Instruction>(U.getUser());
1328 if (!InnerLoop->contains(UserI->getParent()) ||
1329 UserI->getParent() == NewLatch ||
1330 llvm::is_contained(InductionPHIs, UserI))
1331 U.set(NewI);
1332 }
1333 // Add operands of moved instruction to the worklist, except if they are
1334 // outside the inner loop or are the induction PHI.
1335 for (Value *Op : WorkList[i]->operands()) {
1336 Instruction *OpI = dyn_cast<Instruction>(Op);
1337 if (!OpI ||
1338 this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
1339 llvm::is_contained(InductionPHIs, OpI))
1340 continue;
1341 WorkList.insert(OpI);
1342 }
1343 }
1344 };
1345
1346 // FIXME: Should we interchange when we have a constant condition?
1347 Instruction *CondI = dyn_cast<Instruction>(
1348 cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
1349 ->getCondition());
1350 if (CondI)
1351 WorkList.insert(CondI);
1352 MoveInstructions();
1353 for (Instruction *InnerIndexVar : InnerIndexVarList)
1354 WorkList.insert(cast<Instruction>(InnerIndexVar));
1355 MoveInstructions();
1356
1357 // Splits the inner loops phi nodes out into a separate basic block.
1358 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1359 SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1360 LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "splitting InnerLoopHeader done\n"
; } } while (false)
;
1361 }
1362
1363 // Instructions in the original inner loop preheader may depend on values
1364 // defined in the outer loop header. Move them there, because the original
1365 // inner loop preheader will become the entry into the interchanged loop nest.
1366 // Currently we move all instructions and rely on LICM to move invariant
1367 // instructions outside the loop nest.
1368 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1369 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1370 if (InnerLoopPreHeader != OuterLoopHeader) {
1371 SmallPtrSet<Instruction *, 4> NeedsMoving;
1372 for (Instruction &I :
1373 make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
1374 std::prev(InnerLoopPreHeader->end()))))
1375 I.moveBefore(OuterLoopHeader->getTerminator());
1376 }
1377
1378 Transformed |= adjustLoopLinks();
1379 if (!Transformed) {
1380 LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "adjustLoopLinks failed\n"
; } } while (false)
;
1381 return false;
1382 }
1383
1384 return true;
1385}
1386
1387/// \brief Move all instructions except the terminator from FromBB right before
1388/// InsertBefore
1389static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1390 auto &ToList = InsertBefore->getParent()->getInstList();
1391 auto &FromList = FromBB->getInstList();
1392
1393 ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
1394 FromBB->getTerminator()->getIterator());
1395}
1396
1397/// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
1398static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
1399 // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
1400 // from BB1 afterwards.
1401 auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
1402 SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
1403 for (Instruction *I : TempInstrs)
1404 I->removeFromParent();
1405
1406 // Move instructions from BB2 to BB1.
1407 moveBBContents(BB2, BB1->getTerminator());
1408
1409 // Move instructions from TempInstrs to BB2.
1410 for (Instruction *I : TempInstrs)
1411 I->insertBefore(BB2->getTerminator());
1412}
1413
1414// Update BI to jump to NewBB instead of OldBB. Records updates to the
1415// dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1416// \p OldBB is exactly once in BI's successor list.
1417static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1418 BasicBlock *NewBB,
1419 std::vector<DominatorTree::UpdateType> &DTUpdates,
1420 bool MustUpdateOnce = true) {
1421 assert((!MustUpdateOnce ||(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1425, __extension__
__PRETTY_FUNCTION__))
1422 llvm::count_if(successors(BI),(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1425, __extension__
__PRETTY_FUNCTION__))
1423 [OldBB](BasicBlock *BB) {(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1425, __extension__
__PRETTY_FUNCTION__))
1424 return BB == OldBB;(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1425, __extension__
__PRETTY_FUNCTION__))
1425 }) == 1) && "BI must jump to OldBB exactly once.")(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1425, __extension__
__PRETTY_FUNCTION__))
;
1426 bool Changed = false;
1427 for (Use &Op : BI->operands())
1428 if (Op == OldBB) {
1429 Op.set(NewBB);
1430 Changed = true;
1431 }
1432
1433 if (Changed) {
1434 DTUpdates.push_back(
1435 {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
1436 DTUpdates.push_back(
1437 {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
1438 }
1439 assert(Changed && "Expected a successor to be updated")(static_cast <bool> (Changed && "Expected a successor to be updated"
) ? void (0) : __assert_fail ("Changed && \"Expected a successor to be updated\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1439, __extension__
__PRETTY_FUNCTION__))
;
1440}
1441
1442// Move Lcssa PHIs to the right place.
1443static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
1444 BasicBlock *InnerLatch, BasicBlock *OuterHeader,
1445 BasicBlock *OuterLatch, BasicBlock *OuterExit,
1446 Loop *InnerLoop, LoopInfo *LI) {
1447
1448 // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
1449 // defined either in the header or latch. Those blocks will become header and
1450 // latch of the new outer loop, and the only possible users can PHI nodes
1451 // in the exit block of the loop nest or the outer loop header (reduction
1452 // PHIs, in that case, the incoming value must be defined in the inner loop
1453 // header). We can just substitute the user with the incoming value and remove
1454 // the PHI.
1455 for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
1456 assert(P.getNumIncomingValues() == 1 &&(static_cast <bool> (P.getNumIncomingValues() == 1 &&
"Only loops with a single exit are supported!") ? void (0) :
__assert_fail ("P.getNumIncomingValues() == 1 && \"Only loops with a single exit are supported!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1457, __extension__
__PRETTY_FUNCTION__))
1457 "Only loops with a single exit are supported!")(static_cast <bool> (P.getNumIncomingValues() == 1 &&
"Only loops with a single exit are supported!") ? void (0) :
__assert_fail ("P.getNumIncomingValues() == 1 && \"Only loops with a single exit are supported!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1457, __extension__
__PRETTY_FUNCTION__))
;
1458
1459 // Incoming values are guaranteed be instructions currently.
1460 auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
1461 // In case of multi-level nested loops, follow LCSSA to find the incoming
1462 // value defined from the innermost loop.
1463 auto IncIInnerMost = cast<Instruction>(followLCSSA(IncI));
1464 // Skip phis with incoming values from the inner loop body, excluding the
1465 // header and latch.
1466 if (IncIInnerMost->getParent() != InnerLatch &&
1467 IncIInnerMost->getParent() != InnerHeader)
1468 continue;
1469
1470 assert(all_of(P.users(),(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1478, __extension__
__PRETTY_FUNCTION__))
1471 [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1478, __extension__
__PRETTY_FUNCTION__))
1472 return (cast<PHINode>(U)->getParent() == OuterHeader &&(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1478, __extension__
__PRETTY_FUNCTION__))
1473 IncI->getParent() == InnerHeader) ||(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1478, __extension__
__PRETTY_FUNCTION__))
1474 cast<PHINode>(U)->getParent() == OuterExit;(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1478, __extension__
__PRETTY_FUNCTION__))
1475 }) &&(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1478, __extension__
__PRETTY_FUNCTION__))
1476 "Can only replace phis iff the uses are in the loop nest exit or "(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1478, __extension__
__PRETTY_FUNCTION__))
1477 "the incoming value is defined in the inner header (it will "(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1478, __extension__
__PRETTY_FUNCTION__))
1478 "dominate all loop blocks after interchanging)")(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1478, __extension__
__PRETTY_FUNCTION__))
;
1479 P.replaceAllUsesWith(IncI);
1480 P.eraseFromParent();
1481 }
1482
1483 SmallVector<PHINode *, 8> LcssaInnerExit;
1484 for (PHINode &P : InnerExit->phis())
1485 LcssaInnerExit.push_back(&P);
1486
1487 SmallVector<PHINode *, 8> LcssaInnerLatch;
1488 for (PHINode &P : InnerLatch->phis())
1489 LcssaInnerLatch.push_back(&P);
1490
1491 // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1492 // If a PHI node has users outside of InnerExit, it has a use outside the
1493 // interchanged loop and we have to preserve it. We move these to
1494 // InnerLatch, which will become the new exit block for the innermost
1495 // loop after interchanging.
1496 for (PHINode *P : LcssaInnerExit)
1
Assuming '__begin1' is equal to '__end1'
1497 P->moveBefore(InnerLatch->getFirstNonPHI());
1498
1499 // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1500 // and we have to move them to the new inner latch.
1501 for (PHINode *P : LcssaInnerLatch)
2
Assuming '__begin1' is equal to '__end1'
1502 P->moveBefore(InnerExit->getFirstNonPHI());
1503
1504 // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1505 // incoming values defined in the outer loop, we have to add a new PHI
1506 // in the inner loop latch, which became the exit block of the outer loop,
1507 // after interchanging.
1508 if (OuterExit) {
3
Assuming 'OuterExit' is non-null
4
Taking true branch
1509 for (PHINode &P : OuterExit->phis()) {
1510 if (P.getNumIncomingValues() != 1)
5
Assuming the condition is false
6
Taking false branch
1511 continue;
1512 // Skip Phis with incoming values defined in the inner loop. Those should
1513 // already have been updated.
1514 auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
7
Assuming the object is a 'CastReturnType'
1515 if (!I
7.1
'I' is non-null
|| LI->getLoopFor(I->getParent()) == InnerLoop)
8
Assuming the condition is false
9
Taking false branch
1516 continue;
1517
1518 PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
10
Assuming the object is not a 'CastReturnType'
11
'NewPhi' initialized to a null pointer value
1519 NewPhi->setIncomingValue(0, P.getIncomingValue(0));
12
Called C++ object pointer is null
1520 NewPhi->setIncomingBlock(0, OuterLatch);
1521 // We might have incoming edges from other BBs, i.e., the original outer
1522 // header.
1523 for (auto *Pred : predecessors(InnerLatch)) {
1524 if (Pred == OuterLatch)
1525 continue;
1526 NewPhi->addIncoming(P.getIncomingValue(0), Pred);
1527 }
1528 NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
1529 P.setIncomingValue(0, NewPhi);
1530 }
1531 }
1532
1533 // Now adjust the incoming blocks for the LCSSA PHIs.
1534 // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1535 // with the new latch.
1536 InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
1537}
1538
1539bool LoopInterchangeTransform::adjustLoopBranches() {
1540 LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "adjustLoopBranches called\n"
; } } while (false)
;
1541 std::vector<DominatorTree::UpdateType> DTUpdates;
1542
1543 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1544 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1545
1546 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1548, __extension__
__PRETTY_FUNCTION__))
1547 InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1548, __extension__
__PRETTY_FUNCTION__))
1548 InnerLoopPreHeader && "Guaranteed by loop-simplify form")(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1548, __extension__
__PRETTY_FUNCTION__))
;
1549 // Ensure that both preheaders do not contain PHI nodes and have single
1550 // predecessors. This allows us to move them easily. We use
1551 // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1552 // preheaders do not satisfy those conditions.
1553 if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1554 !OuterLoopPreHeader->getUniquePredecessor())
1555 OuterLoopPreHeader =
1556 InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
1557 if (InnerLoopPreHeader == OuterLoop->getHeader())
1558 InnerLoopPreHeader =
1559 InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
1560
1561 // Adjust the loop preheader
1562 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1563 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1564 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1565 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1566 BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1567 BasicBlock *InnerLoopLatchPredecessor =
1568 InnerLoopLatch->getUniquePredecessor();
1569 BasicBlock *InnerLoopLatchSuccessor;
1570 BasicBlock *OuterLoopLatchSuccessor;
1571
1572 BranchInst *OuterLoopLatchBI =
1573 dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1574 BranchInst *InnerLoopLatchBI =
1575 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1576 BranchInst *OuterLoopHeaderBI =
1577 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1578 BranchInst *InnerLoopHeaderBI =
1579 dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1580
1581 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1582 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1583 !InnerLoopHeaderBI)
1584 return false;
1585
1586 BranchInst *InnerLoopLatchPredecessorBI =
1587 dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1588 BranchInst *OuterLoopPredecessorBI =
1589 dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1590
1591 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1592 return false;
1593 BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1594 if (!InnerLoopHeaderSuccessor)
1595 return false;
1596
1597 // Adjust Loop Preheader and headers.
1598 // The branches in the outer loop predecessor and the outer loop header can
1599 // be unconditional branches or conditional branches with duplicates. Consider
1600 // this when updating the successors.
1601 updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1602 InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
1603 // The outer loop header might or might not branch to the outer latch.
1604 // We are guaranteed to branch to the inner loop preheader.
1605 if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
1606 // In this case the outerLoopHeader should branch to the InnerLoopLatch.
1607 updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
1608 DTUpdates,
1609 /*MustUpdateOnce=*/false);
1610 }
1611 updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1612 InnerLoopHeaderSuccessor, DTUpdates,
1613 /*MustUpdateOnce=*/false);
1614
1615 // Adjust reduction PHI's now that the incoming block has changed.
1616 InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
1617 OuterLoopHeader);
1618
1619 updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1620 OuterLoopPreHeader, DTUpdates);
1621
1622 // -------------Adjust loop latches-----------
1623 if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1624 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1625 else
1626 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1627
1628 updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1629 InnerLoopLatchSuccessor, DTUpdates);
1630
1631 if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1632 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1633 else
1634 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1635
1636 updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1637 OuterLoopLatchSuccessor, DTUpdates);
1638 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1639 DTUpdates);
1640
1641 DT->applyUpdates(DTUpdates);
1642 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1643 OuterLoopPreHeader);
1644
1645 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1646 OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
1647 InnerLoop, LI);
1648 // For PHIs in the exit block of the outer loop, outer's latch has been
1649 // replaced by Inners'.
1650 OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1651
1652 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1653 // Now update the reduction PHIs in the inner and outer loop headers.
1654 SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1655 for (PHINode &PHI : InnerLoopHeader->phis())
1656 if (OuterInnerReductions.contains(&PHI))
1657 InnerLoopPHIs.push_back(&PHI);
1658
1659 for (PHINode &PHI : OuterLoopHeader->phis())
1660 if (OuterInnerReductions.contains(&PHI))
1661 OuterLoopPHIs.push_back(&PHI);
1662
1663 // Now move the remaining reduction PHIs from outer to inner loop header and
1664 // vice versa. The PHI nodes must be part of a reduction across the inner and
1665 // outer loop and all the remains to do is and updating the incoming blocks.
1666 for (PHINode *PHI : OuterLoopPHIs) {
1667 LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Outer loop reduction PHIs:\n"
; PHI->dump();; } } while (false)
;
1668 PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
1669 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node")(static_cast <bool> (OuterInnerReductions.count(PHI) &&
"Expected a reduction PHI node") ? void (0) : __assert_fail (
"OuterInnerReductions.count(PHI) && \"Expected a reduction PHI node\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1669, __extension__
__PRETTY_FUNCTION__))
;
1670 }
1671 for (PHINode *PHI : InnerLoopPHIs) {
1672 LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop reduction PHIs:\n"
; PHI->dump();; } } while (false)
;
1673 PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
1674 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node")(static_cast <bool> (OuterInnerReductions.count(PHI) &&
"Expected a reduction PHI node") ? void (0) : __assert_fail (
"OuterInnerReductions.count(PHI) && \"Expected a reduction PHI node\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1674, __extension__
__PRETTY_FUNCTION__))
;
1675 }
1676
1677 // Update the incoming blocks for moved PHI nodes.
1678 OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
1679 OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
1680 InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
1681 InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1682
1683 // Values defined in the outer loop header could be used in the inner loop
1684 // latch. In that case, we need to create LCSSA phis for them, because after
1685 // interchanging they will be defined in the new inner loop and used in the
1686 // new outer loop.
1687 IRBuilder<> Builder(OuterLoopHeader->getContext());
1688 SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
1689 for (Instruction &I :
1690 make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
1691 MayNeedLCSSAPhis.push_back(&I);
1692 formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE, Builder);
1693
1694 return true;
1695}
1696
1697bool LoopInterchangeTransform::adjustLoopLinks() {
1698 // Adjust all branches in the inner and outer loop.
1699 bool Changed = adjustLoopBranches();
1700 if (Changed) {
1701 // We have interchanged the preheaders so we need to interchange the data in
1702 // the preheaders as well. This is because the content of the inner
1703 // preheader was previously executed inside the outer loop.
1704 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1705 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1706 swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
1707 }
1708 return Changed;
1709}
1710
1711namespace {
1712/// Main LoopInterchange Pass.
1713struct LoopInterchangeLegacyPass : public LoopPass {
1714 static char ID;
1715
1716 LoopInterchangeLegacyPass() : LoopPass(ID) {
1717 initializeLoopInterchangeLegacyPassPass(*PassRegistry::getPassRegistry());
1718 }
1719
1720 void getAnalysisUsage(AnalysisUsage &AU) const override {
1721 AU.addRequired<DependenceAnalysisWrapperPass>();
1722 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1723
1724 getLoopAnalysisUsage(AU);
1725 }
1726
1727 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1728 if (skipLoop(L))
1729 return false;
1730
1731 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1732 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1733 auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
1734 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1735 auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1736 std::unique_ptr<CacheCost> CC = nullptr;
1737 return LoopInterchange(SE, LI, DI, DT, CC, ORE).run(L);
1738 }
1739};
1740} // namespace
1741
1742char LoopInterchangeLegacyPass::ID = 0;
1743
1744INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange",static void *initializeLoopInterchangeLegacyPassPassOnce(PassRegistry
&Registry) {
1745 "Interchanges loops for cache reuse", false, false)static void *initializeLoopInterchangeLegacyPassPassOnce(PassRegistry
&Registry) {
1746INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry);
1747INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)initializeDependenceAnalysisWrapperPassPass(Registry);
1748INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)initializeOptimizationRemarkEmitterWrapperPassPass(Registry);
1749
1750INITIALIZE_PASS_END(LoopInterchangeLegacyPass, "loop-interchange",PassInfo *PI = new PassInfo( "Interchanges loops for cache reuse"
, "loop-interchange", &LoopInterchangeLegacyPass::ID, PassInfo
::NormalCtor_t(callDefaultCtor<LoopInterchangeLegacyPass>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeLoopInterchangeLegacyPassPassFlag
; void llvm::initializeLoopInterchangeLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeLoopInterchangeLegacyPassPassFlag
, initializeLoopInterchangeLegacyPassPassOnce, std::ref(Registry
)); }
1751 "Interchanges loops for cache reuse", false, false)PassInfo *PI = new PassInfo( "Interchanges loops for cache reuse"
, "loop-interchange", &LoopInterchangeLegacyPass::ID, PassInfo
::NormalCtor_t(callDefaultCtor<LoopInterchangeLegacyPass>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeLoopInterchangeLegacyPassPassFlag
; void llvm::initializeLoopInterchangeLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeLoopInterchangeLegacyPassPassFlag
, initializeLoopInterchangeLegacyPassPassOnce, std::ref(Registry
)); }
1752
1753Pass *llvm::createLoopInterchangePass() {
1754 return new LoopInterchangeLegacyPass();
1755}
1756
1757PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
1758 LoopAnalysisManager &AM,
1759 LoopStandardAnalysisResults &AR,
1760 LPMUpdater &U) {
1761 Function &F = *LN.getParent();
1762
1763 DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
1764 std::unique_ptr<CacheCost> CC =
1765 CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
1766 OptimizationRemarkEmitter ORE(&F);
1767 if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
1768 return PreservedAnalyses::all();
1769 return getLoopPassPreservedAnalyses();
1770}