Bug Summary

File:build/source/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
Warning:line 635, column 8
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name LoopInterchange.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-17/lib/clang/17 -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Transforms/Scalar -I /build/source/llvm/lib/Transforms/Scalar -I include -I /build/source/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1683717183 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-05-10-133810-16478-1 -x c++ /build/source/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
1//===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This Pass handles loop interchange transform.
10// This pass interchanges loops to provide a more cache-friendly memory access
11// patterns.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/Scalar/LoopInterchange.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/Analysis/DependenceAnalysis.h"
21#include "llvm/Analysis/LoopCacheAnalysis.h"
22#include "llvm/Analysis/LoopInfo.h"
23#include "llvm/Analysis/LoopNestAnalysis.h"
24#include "llvm/Analysis/LoopPass.h"
25#include "llvm/Analysis/OptimizationRemarkEmitter.h"
26#include "llvm/Analysis/ScalarEvolution.h"
27#include "llvm/Analysis/ScalarEvolutionExpressions.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/Constants.h"
30#include "llvm/IR/DiagnosticInfo.h"
31#include "llvm/IR/Dominators.h"
32#include "llvm/IR/Function.h"
33#include "llvm/IR/IRBuilder.h"
34#include "llvm/IR/InstrTypes.h"
35#include "llvm/IR/Instruction.h"
36#include "llvm/IR/Instructions.h"
37#include "llvm/IR/User.h"
38#include "llvm/IR/Value.h"
39#include "llvm/Support/Casting.h"
40#include "llvm/Support/CommandLine.h"
41#include "llvm/Support/Debug.h"
42#include "llvm/Support/ErrorHandling.h"
43#include "llvm/Support/raw_ostream.h"
44#include "llvm/Transforms/Scalar/LoopPassManager.h"
45#include "llvm/Transforms/Utils/BasicBlockUtils.h"
46#include "llvm/Transforms/Utils/LoopUtils.h"
47#include <cassert>
48#include <utility>
49#include <vector>
50
51using namespace llvm;
52
53#define DEBUG_TYPE"loop-interchange" "loop-interchange"
54
55STATISTIC(LoopsInterchanged, "Number of loops interchanged")static llvm::Statistic LoopsInterchanged = {"loop-interchange"
, "LoopsInterchanged", "Number of loops interchanged"}
;
56
57static cl::opt<int> LoopInterchangeCostThreshold(
58 "loop-interchange-threshold", cl::init(0), cl::Hidden,
59 cl::desc("Interchange if you gain more than this number"));
60
61namespace {
62
63using LoopVector = SmallVector<Loop *, 8>;
64
65// TODO: Check if we can use a sparse matrix here.
66using CharMatrix = std::vector<std::vector<char>>;
67
68} // end anonymous namespace
69
70// Maximum number of dependencies that can be handled in the dependency matrix.
71static const unsigned MaxMemInstrCount = 100;
72
73// Maximum loop depth supported.
74static const unsigned MaxLoopNestDepth = 10;
75
76#ifdef DUMP_DEP_MATRICIES
77static void printDepMatrix(CharMatrix &DepMatrix) {
78 for (auto &Row : DepMatrix) {
79 for (auto D : Row)
80 LLVM_DEBUG(dbgs() << D << " ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << D << " "; } } while
(false)
;
81 LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "\n"; } } while (false
)
;
82 }
83}
84#endif
85
86static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
87 Loop *L, DependenceInfo *DI,
88 ScalarEvolution *SE) {
89 using ValueVector = SmallVector<Value *, 16>;
90
91 ValueVector MemInstr;
92
93 // For each block.
94 for (BasicBlock *BB : L->blocks()) {
95 // Scan the BB and collect legal loads and stores.
96 for (Instruction &I : *BB) {
97 if (!isa<Instruction>(I))
98 return false;
99 if (auto *Ld = dyn_cast<LoadInst>(&I)) {
100 if (!Ld->isSimple())
101 return false;
102 MemInstr.push_back(&I);
103 } else if (auto *St = dyn_cast<StoreInst>(&I)) {
104 if (!St->isSimple())
105 return false;
106 MemInstr.push_back(&I);
107 }
108 }
109 }
110
111 LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found " << MemInstr
.size() << " Loads and Stores to analyze\n"; } } while (
false)
112 << " Loads and Stores to analyze\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found " << MemInstr
.size() << " Loads and Stores to analyze\n"; } } while (
false)
;
113
114 ValueVector::iterator I, IE, J, JE;
115
116 for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
117 for (J = I, JE = MemInstr.end(); J != JE; ++J) {
118 std::vector<char> Dep;
119 Instruction *Src = cast<Instruction>(*I);
120 Instruction *Dst = cast<Instruction>(*J);
121 // Ignore Input dependencies.
122 if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
123 continue;
124 // Track Output, Flow, and Anti dependencies.
125 if (auto D = DI->depends(Src, Dst, true)) {
126 assert(D->isOrdered() && "Expected an output, flow or anti dep.")(static_cast <bool> (D->isOrdered() && "Expected an output, flow or anti dep."
) ? void (0) : __assert_fail ("D->isOrdered() && \"Expected an output, flow or anti dep.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 126, __extension__
__PRETTY_FUNCTION__))
;
127 // If the direction vector is negative, normalize it to
128 // make it non-negative.
129 if (D->normalize(SE))
130 LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Negative dependence vector normalized.\n"
; } } while (false)
;
131 LLVM_DEBUG(StringRef DepType =do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
132 D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
133 dbgs() << "Found " << DepTypedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
134 << " dependency between Src and Dst\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
135 << " Src:" << *Src << "\n Dst:" << *Dst << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
;
136 unsigned Levels = D->getLevels();
137 char Direction;
138 for (unsigned II = 1; II <= Levels; ++II) {
139 if (D->isScalar(II)) {
140 Direction = 'S';
141 Dep.push_back(Direction);
142 } else {
143 unsigned Dir = D->getDirection(II);
144 if (Dir == Dependence::DVEntry::LT ||
145 Dir == Dependence::DVEntry::LE)
146 Direction = '<';
147 else if (Dir == Dependence::DVEntry::GT ||
148 Dir == Dependence::DVEntry::GE)
149 Direction = '>';
150 else if (Dir == Dependence::DVEntry::EQ)
151 Direction = '=';
152 else
153 Direction = '*';
154 Dep.push_back(Direction);
155 }
156 }
157 while (Dep.size() != Level) {
158 Dep.push_back('I');
159 }
160
161 DepMatrix.push_back(Dep);
162 if (DepMatrix.size() > MaxMemInstrCount) {
163 LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle more than "
<< MaxMemInstrCount << " dependencies inside loop\n"
; } } while (false)
164 << " dependencies inside loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle more than "
<< MaxMemInstrCount << " dependencies inside loop\n"
; } } while (false)
;
165 return false;
166 }
167 }
168 }
169 }
170
171 return true;
172}
173
174// A loop is moved from index 'from' to an index 'to'. Update the Dependence
175// matrix by exchanging the two columns.
176static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
177 unsigned ToIndx) {
178 for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I)
179 std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]);
180}
181
182// After interchanging, check if the direction vector is valid.
183// [Theorem] A permutation of the loops in a perfect nest is legal if and only
184// if the direction matrix, after the same permutation is applied to its
185// columns, has no ">" direction as the leftmost non-"=" direction in any row.
186static bool isLexicographicallyPositive(std::vector<char> &DV) {
187 for (unsigned char Direction : DV) {
188 if (Direction == '<')
189 return true;
190 if (Direction == '>' || Direction == '*')
191 return false;
192 }
193 return true;
194}
195
196// Checks if it is legal to interchange 2 loops.
197static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
198 unsigned InnerLoopId,
199 unsigned OuterLoopId) {
200 unsigned NumRows = DepMatrix.size();
201 std::vector<char> Cur;
202 // For each row check if it is valid to interchange.
203 for (unsigned Row = 0; Row < NumRows; ++Row) {
204 // Create temporary DepVector check its lexicographical order
205 // before and after swapping OuterLoop vs InnerLoop
206 Cur = DepMatrix[Row];
207 if (!isLexicographicallyPositive(Cur))
208 return false;
209 std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
210 if (!isLexicographicallyPositive(Cur))
211 return false;
212 }
213 return true;
214}
215
216static void populateWorklist(Loop &L, LoopVector &LoopList) {
217 LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
218 << L.getHeader()->getParent()->getName() << " Loop: %"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
219 << L.getHeader()->getName() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
;
220 assert(LoopList.empty() && "LoopList should initially be empty!")(static_cast <bool> (LoopList.empty() && "LoopList should initially be empty!"
) ? void (0) : __assert_fail ("LoopList.empty() && \"LoopList should initially be empty!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 220, __extension__
__PRETTY_FUNCTION__))
;
221 Loop *CurrentLoop = &L;
222 const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
223 while (!Vec->empty()) {
224 // The current loop has multiple subloops in it hence it is not tightly
225 // nested.
226 // Discard all loops above it added into Worklist.
227 if (Vec->size() != 1) {
228 LoopList = {};
229 return;
230 }
231
232 LoopList.push_back(CurrentLoop);
233 CurrentLoop = Vec->front();
234 Vec = &CurrentLoop->getSubLoops();
235 }
236 LoopList.push_back(CurrentLoop);
237}
238
239namespace {
240
241/// LoopInterchangeLegality checks if it is legal to interchange the loop.
242class LoopInterchangeLegality {
243public:
244 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
245 OptimizationRemarkEmitter *ORE)
246 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
247
248 /// Check if the loops can be interchanged.
249 bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
250 CharMatrix &DepMatrix);
251
252 /// Discover induction PHIs in the header of \p L. Induction
253 /// PHIs are added to \p Inductions.
254 bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
255
256 /// Check if the loop structure is understood. We do not handle triangular
257 /// loops for now.
258 bool isLoopStructureUnderstood();
259
260 bool currentLimitations();
261
262 const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
263 return OuterInnerReductions;
264 }
265
266 const SmallVectorImpl<PHINode *> &getInnerLoopInductions() const {
267 return InnerLoopInductions;
268 }
269
270private:
271 bool tightlyNested(Loop *Outer, Loop *Inner);
272 bool containsUnsafeInstructions(BasicBlock *BB);
273
274 /// Discover induction and reduction PHIs in the header of \p L. Induction
275 /// PHIs are added to \p Inductions, reductions are added to
276 /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
277 /// to be passed as \p InnerLoop.
278 bool findInductionAndReductions(Loop *L,
279 SmallVector<PHINode *, 8> &Inductions,
280 Loop *InnerLoop);
281
282 Loop *OuterLoop;
283 Loop *InnerLoop;
284
285 ScalarEvolution *SE;
286
287 /// Interface to emit optimization remarks.
288 OptimizationRemarkEmitter *ORE;
289
290 /// Set of reduction PHIs taking part of a reduction across the inner and
291 /// outer loop.
292 SmallPtrSet<PHINode *, 4> OuterInnerReductions;
293
294 /// Set of inner loop induction PHIs
295 SmallVector<PHINode *, 8> InnerLoopInductions;
296};
297
298/// LoopInterchangeProfitability checks if it is profitable to interchange the
299/// loop.
300class LoopInterchangeProfitability {
301public:
302 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
303 OptimizationRemarkEmitter *ORE)
304 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
305
306 /// Check if the loop interchange is profitable.
307 bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
308 unsigned InnerLoopId, unsigned OuterLoopId,
309 CharMatrix &DepMatrix,
310 const DenseMap<const Loop *, unsigned> &CostMap,
311 std::unique_ptr<CacheCost> &CC);
312
313private:
314 int getInstrOrderCost();
315 std::optional<bool> isProfitablePerLoopCacheAnalysis(
316 const DenseMap<const Loop *, unsigned> &CostMap,
317 std::unique_ptr<CacheCost> &CC);
318 std::optional<bool> isProfitablePerInstrOrderCost();
319 std::optional<bool> isProfitableForVectorization(unsigned InnerLoopId,
320 unsigned OuterLoopId,
321 CharMatrix &DepMatrix);
322 Loop *OuterLoop;
323 Loop *InnerLoop;
324
325 /// Scev analysis.
326 ScalarEvolution *SE;
327
328 /// Interface to emit optimization remarks.
329 OptimizationRemarkEmitter *ORE;
330};
331
332/// LoopInterchangeTransform interchanges the loop.
333class LoopInterchangeTransform {
334public:
335 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
336 LoopInfo *LI, DominatorTree *DT,
337 const LoopInterchangeLegality &LIL)
338 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
339
340 /// Interchange OuterLoop and InnerLoop.
341 bool transform();
342 void restructureLoops(Loop *NewInner, Loop *NewOuter,
343 BasicBlock *OrigInnerPreHeader,
344 BasicBlock *OrigOuterPreHeader);
345 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
346
347private:
348 bool adjustLoopLinks();
349 bool adjustLoopBranches();
350
351 Loop *OuterLoop;
352 Loop *InnerLoop;
353
354 /// Scev analysis.
355 ScalarEvolution *SE;
356
357 LoopInfo *LI;
358 DominatorTree *DT;
359
360 const LoopInterchangeLegality &LIL;
361};
362
363struct LoopInterchange {
364 ScalarEvolution *SE = nullptr;
365 LoopInfo *LI = nullptr;
366 DependenceInfo *DI = nullptr;
367 DominatorTree *DT = nullptr;
368 std::unique_ptr<CacheCost> CC = nullptr;
369
370 /// Interface to emit optimization remarks.
371 OptimizationRemarkEmitter *ORE;
372
373 LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
374 DominatorTree *DT, std::unique_ptr<CacheCost> &CC,
375 OptimizationRemarkEmitter *ORE)
376 : SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC)), ORE(ORE) {}
377
378 bool run(Loop *L) {
379 if (L->getParentLoop())
380 return false;
381 SmallVector<Loop *, 8> LoopList;
382 populateWorklist(*L, LoopList);
383 return processLoopList(LoopList);
384 }
385
386 bool run(LoopNest &LN) {
387 SmallVector<Loop *, 8> LoopList(LN.getLoops().begin(), LN.getLoops().end());
388 for (unsigned I = 1; I < LoopList.size(); ++I)
389 if (LoopList[I]->getParentLoop() != LoopList[I - 1])
390 return false;
391 return processLoopList(LoopList);
392 }
393
394 bool isComputableLoopNest(ArrayRef<Loop *> LoopList) {
395 for (Loop *L : LoopList) {
396 const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
397 if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
398 LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Couldn't compute backedge count\n"
; } } while (false)
;
399 return false;
400 }
401 if (L->getNumBackEdges() != 1) {
402 LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "NumBackEdges is not equal to 1\n"
; } } while (false)
;
403 return false;
404 }
405 if (!L->getExitingBlock()) {
406 LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop doesn't have unique exit block\n"
; } } while (false)
;
407 return false;
408 }
409 }
410 return true;
411 }
412
413 unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
414 // TODO: Add a better heuristic to select the loop to be interchanged based
415 // on the dependence matrix. Currently we select the innermost loop.
416 return LoopList.size() - 1;
417 }
418
419 bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
420 bool Changed = false;
421 unsigned LoopNestDepth = LoopList.size();
422 if (LoopNestDepth < 2) {
423 LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop doesn't contain minimum nesting level.\n"
; } } while (false)
;
424 return false;
425 }
426 if (LoopNestDepth > MaxLoopNestDepth) {
427 LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle loops of depth greater than "
<< MaxLoopNestDepth << "\n"; } } while (false)
428 << MaxLoopNestDepth << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle loops of depth greater than "
<< MaxLoopNestDepth << "\n"; } } while (false)
;
429 return false;
430 }
431 if (!isComputableLoopNest(LoopList)) {
432 LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not valid loop candidate for interchange\n"
; } } while (false)
;
433 return false;
434 }
435
436 LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepthdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing LoopList of size = "
<< LoopNestDepth << "\n"; } } while (false)
437 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing LoopList of size = "
<< LoopNestDepth << "\n"; } } while (false)
;
438
439 CharMatrix DependencyMatrix;
440 Loop *OuterMostLoop = *(LoopList.begin());
441 if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
442 OuterMostLoop, DI, SE)) {
443 LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Populating dependency matrix failed\n"
; } } while (false)
;
444 return false;
445 }
446#ifdef DUMP_DEP_MATRICIES
447 LLVM_DEBUG(dbgs() << "Dependence before interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Dependence before interchange\n"
; } } while (false)
;
448 printDepMatrix(DependencyMatrix);
449#endif
450
451 // Get the Outermost loop exit.
452 BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
453 if (!LoopNestExit) {
454 LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "OuterMostLoop needs an unique exit block"
; } } while (false)
;
455 return false;
456 }
457
458 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
459 // Obtain the loop vector returned from loop cache analysis beforehand,
460 // and put each <Loop, index> pair into a map for constant time query
461 // later. Indices in loop vector reprsent the optimal order of the
462 // corresponding loop, e.g., given a loopnest with depth N, index 0
463 // indicates the loop should be placed as the outermost loop and index N
464 // indicates the loop should be placed as the innermost loop.
465 //
466 // For the old pass manager CacheCost would be null.
467 DenseMap<const Loop *, unsigned> CostMap;
468 if (CC != nullptr) {
469 const auto &LoopCosts = CC->getLoopCosts();
470 for (unsigned i = 0; i < LoopCosts.size(); i++) {
471 CostMap[LoopCosts[i].first] = i;
472 }
473 }
474 // We try to achieve the globally optimal memory access for the loopnest,
475 // and do interchange based on a bubble-sort fasion. We start from
476 // the innermost loop, move it outwards to the best possible position
477 // and repeat this process.
478 for (unsigned j = SelecLoopId; j > 0; j--) {
479 bool ChangedPerIter = false;
480 for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
481 bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
482 DependencyMatrix, CostMap);
483 if (!Interchanged)
484 continue;
485 // Loops interchanged, update LoopList accordingly.
486 std::swap(LoopList[i - 1], LoopList[i]);
487 // Update the DependencyMatrix
488 interChangeDependencies(DependencyMatrix, i, i - 1);
489#ifdef DUMP_DEP_MATRICIES
490 LLVM_DEBUG(dbgs() << "Dependence after interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Dependence after interchange\n"
; } } while (false)
;
491 printDepMatrix(DependencyMatrix);
492#endif
493 ChangedPerIter |= Interchanged;
494 Changed |= Interchanged;
495 }
496 // Early abort if there was no interchange during an entire round of
497 // moving loops outwards.
498 if (!ChangedPerIter)
499 break;
500 }
501 return Changed;
502 }
503
504 bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
505 unsigned OuterLoopId,
506 std::vector<std::vector<char>> &DependencyMatrix,
507 const DenseMap<const Loop *, unsigned> &CostMap) {
508 LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << "\n"; } } while (false)
509 << " and OuterLoopId = " << OuterLoopId << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << "\n"; } } while (false)
;
510 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
511 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
512 LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not interchanging loops. Cannot prove legality.\n"
; } } while (false)
;
513 return false;
514 }
515 LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops are legal to interchange\n"
; } } while (false)
;
516 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
517 if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
518 DependencyMatrix, CostMap, CC)) {
519 LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Interchanging loops not profitable.\n"
; } } while (false)
;
520 return false;
521 }
522
523 ORE->emit([&]() {
524 return OptimizationRemark(DEBUG_TYPE"loop-interchange", "Interchanged",
525 InnerLoop->getStartLoc(),
526 InnerLoop->getHeader())
527 << "Loop interchanged with enclosing loop.";
528 });
529
530 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
531 LIT.transform();
532 LLVM_DEBUG(dbgs() << "Loops interchanged.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops interchanged.\n"
; } } while (false)
;
533 LoopsInterchanged++;
534
535 llvm::formLCSSARecursively(*OuterLoop, *DT, LI);
536 return true;
537 }
538};
539
540} // end anonymous namespace
541
542bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
543 return any_of(*BB, [](const Instruction &I) {
544 return I.mayHaveSideEffects() || I.mayReadFromMemory();
545 });
546}
547
548bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
549 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
550 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
551 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
552
553 LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Checking if loops are tightly nested\n"
; } } while (false)
;
554
555 // A perfectly nested loop will not have any branch in between the outer and
556 // inner block i.e. outer header will branch to either inner preheader and
557 // outerloop latch.
558 BranchInst *OuterLoopHeaderBI =
559 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
560 if (!OuterLoopHeaderBI)
561 return false;
562
563 for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
564 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
565 Succ != OuterLoopLatch)
566 return false;
567
568 LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Checking instructions in Loop header and Loop latch\n"
; } } while (false)
;
569 // We do not have any basic block in between now make sure the outer header
570 // and outer loop latch doesn't contain any unsafe instructions.
571 if (containsUnsafeInstructions(OuterLoopHeader) ||
572 containsUnsafeInstructions(OuterLoopLatch))
573 return false;
574
575 // Also make sure the inner loop preheader does not contain any unsafe
576 // instructions. Note that all instructions in the preheader will be moved to
577 // the outer loop header when interchanging.
578 if (InnerLoopPreHeader != OuterLoopHeader &&
579 containsUnsafeInstructions(InnerLoopPreHeader))
580 return false;
581
582 BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
583 // Ensure the inner loop exit block flows to the outer loop latch possibly
584 // through empty blocks.
585 const BasicBlock &SuccInner =
586 LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch);
587 if (&SuccInner != OuterLoopLatch) {
588 LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExitdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop exit block "
<< *InnerLoopExit << " does not lead to the outer loop latch.\n"
;; } } while (false)
589 << " does not lead to the outer loop latch.\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop exit block "
<< *InnerLoopExit << " does not lead to the outer loop latch.\n"
;; } } while (false)
;
590 return false;
591 }
592 // The inner loop exit block does flow to the outer loop latch and not some
593 // other BBs, now make sure it contains safe instructions, since it will be
594 // moved into the (new) inner loop after interchange.
595 if (containsUnsafeInstructions(InnerLoopExit))
596 return false;
597
598 LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops are perfectly nested\n"
; } } while (false)
;
599 // We have a perfect loop nest.
600 return true;
601}
602
603bool LoopInterchangeLegality::isLoopStructureUnderstood() {
604 BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
605 for (PHINode *InnerInduction : InnerLoopInductions) {
10
Assuming '__begin1' is equal to '__end1'
606 unsigned Num = InnerInduction->getNumOperands();
607 for (unsigned i = 0; i < Num; ++i) {
608 Value *Val = InnerInduction->getOperand(i);
609 if (isa<Constant>(Val))
610 continue;
611 Instruction *I = dyn_cast<Instruction>(Val);
612 if (!I)
613 return false;
614 // TODO: Handle triangular loops.
615 // e.g. for(int i=0;i<N;i++)
616 // for(int j=i;j<N;j++)
617 unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
618 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
619 InnerLoopPreheader &&
620 !OuterLoop->isLoopInvariant(I)) {
621 return false;
622 }
623 }
624 }
625
626 // TODO: Handle triangular loops of another form.
627 // e.g. for(int i=0;i<N;i++)
628 // for(int j=0;j<i;j++)
629 // or,
630 // for(int i=0;i<N;i++)
631 // for(int j=0;j*i<N;j++)
632 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
633 BranchInst *InnerLoopLatchBI =
12
'InnerLoopLatchBI' initialized to a null pointer value
634 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
11
Assuming the object is not a 'CastReturnType'
635 if (!InnerLoopLatchBI->isConditional())
13
Called C++ object pointer is null
636 return false;
637 if (CmpInst *InnerLoopCmp =
638 dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) {
639 Value *Op0 = InnerLoopCmp->getOperand(0);
640 Value *Op1 = InnerLoopCmp->getOperand(1);
641
642 // LHS and RHS of the inner loop exit condition, e.g.,
643 // in "for(int j=0;j<i;j++)", LHS is j and RHS is i.
644 Value *Left = nullptr;
645 Value *Right = nullptr;
646
647 // Check if V only involves inner loop induction variable.
648 // Return true if V is InnerInduction, or a cast from
649 // InnerInduction, or a binary operator that involves
650 // InnerInduction and a constant.
651 std::function<bool(Value *)> IsPathToInnerIndVar;
652 IsPathToInnerIndVar = [this, &IsPathToInnerIndVar](const Value *V) -> bool {
653 if (llvm::is_contained(InnerLoopInductions, V))
654 return true;
655 if (isa<Constant>(V))
656 return true;
657 const Instruction *I = dyn_cast<Instruction>(V);
658 if (!I)
659 return false;
660 if (isa<CastInst>(I))
661 return IsPathToInnerIndVar(I->getOperand(0));
662 if (isa<BinaryOperator>(I))
663 return IsPathToInnerIndVar(I->getOperand(0)) &&
664 IsPathToInnerIndVar(I->getOperand(1));
665 return false;
666 };
667
668 // In case of multiple inner loop indvars, it is okay if LHS and RHS
669 // are both inner indvar related variables.
670 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
671 return true;
672
673 // Otherwise we check if the cmp instruction compares an inner indvar
674 // related variable (Left) with a outer loop invariant (Right).
675 if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
676 Left = Op0;
677 Right = Op1;
678 } else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
679 Left = Op1;
680 Right = Op0;
681 }
682
683 if (Left == nullptr)
684 return false;
685
686 const SCEV *S = SE->getSCEV(Right);
687 if (!SE->isLoopInvariant(S, OuterLoop))
688 return false;
689 }
690
691 return true;
692}
693
694// If SV is a LCSSA PHI node with a single incoming value, return the incoming
695// value.
696static Value *followLCSSA(Value *SV) {
697 PHINode *PHI = dyn_cast<PHINode>(SV);
698 if (!PHI)
699 return SV;
700
701 if (PHI->getNumIncomingValues() != 1)
702 return SV;
703 return followLCSSA(PHI->getIncomingValue(0));
704}
705
706// Check V's users to see if it is involved in a reduction in L.
707static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
708 // Reduction variables cannot be constants.
709 if (isa<Constant>(V))
710 return nullptr;
711
712 for (Value *User : V->users()) {
713 if (PHINode *PHI = dyn_cast<PHINode>(User)) {
714 if (PHI->getNumIncomingValues() == 1)
715 continue;
716 RecurrenceDescriptor RD;
717 if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) {
718 // Detect floating point reduction only when it can be reordered.
719 if (RD.getExactFPMathInst() != nullptr)
720 return nullptr;
721 return PHI;
722 }
723 return nullptr;
724 }
725 }
726
727 return nullptr;
728}
729
730bool LoopInterchangeLegality::findInductionAndReductions(
731 Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
732 if (!L->getLoopLatch() || !L->getLoopPredecessor())
733 return false;
734 for (PHINode &PHI : L->getHeader()->phis()) {
735 InductionDescriptor ID;
736 if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
737 Inductions.push_back(&PHI);
738 else {
739 // PHIs in inner loops need to be part of a reduction in the outer loop,
740 // discovered when checking the PHIs of the outer loop earlier.
741 if (!InnerLoop) {
742 if (!OuterInnerReductions.count(&PHI)) {
743 LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop PHI is not part of reductions "
"across the outer loop.\n"; } } while (false)
744 "across the outer loop.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop PHI is not part of reductions "
"across the outer loop.\n"; } } while (false)
;
745 return false;
746 }
747 } else {
748 assert(PHI.getNumIncomingValues() == 2 &&(static_cast <bool> (PHI.getNumIncomingValues() == 2 &&
"Phis in loop header should have exactly 2 incoming values")
? void (0) : __assert_fail ("PHI.getNumIncomingValues() == 2 && \"Phis in loop header should have exactly 2 incoming values\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 749, __extension__
__PRETTY_FUNCTION__))
749 "Phis in loop header should have exactly 2 incoming values")(static_cast <bool> (PHI.getNumIncomingValues() == 2 &&
"Phis in loop header should have exactly 2 incoming values")
? void (0) : __assert_fail ("PHI.getNumIncomingValues() == 2 && \"Phis in loop header should have exactly 2 incoming values\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 749, __extension__
__PRETTY_FUNCTION__))
;
750 // Check if we have a PHI node in the outer loop that has a reduction
751 // result from the inner loop as an incoming value.
752 Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
753 PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
754 if (!InnerRedPhi ||
755 !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
756 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
757 dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
758 << "Failed to recognize PHI as an induction or reduction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
;
759 return false;
760 }
761 OuterInnerReductions.insert(&PHI);
762 OuterInnerReductions.insert(InnerRedPhi);
763 }
764 }
765 }
766 return true;
767}
768
769// This function indicates the current limitations in the transform as a result
770// of which we do not proceed.
771bool LoopInterchangeLegality::currentLimitations() {
772 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
773
774 // transform currently expects the loop latches to also be the exiting
775 // blocks.
776 if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
1
Assuming the condition is false
5
Taking false branch
777 OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
2
Assuming the condition is false
778 !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
3
Assuming the object is a 'class llvm::BranchInst &'
779 !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
4
Assuming the object is a 'class llvm::BranchInst &'
780 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
781 dbgs() << "Loops where the latch is not the exiting block are not"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
782 << " supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
;
783 ORE->emit([&]() {
784 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "ExitingNotLatch",
785 OuterLoop->getStartLoc(),
786 OuterLoop->getHeader())
787 << "Loops where the latch is not the exiting block cannot be"
788 " interchange currently.";
789 });
790 return true;
791 }
792
793 SmallVector<PHINode *, 8> Inductions;
794 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
6
Taking false branch
795 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
796 dbgs() << "Only outer loops with induction or reduction PHI nodes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
797 << "are supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
;
798 ORE->emit([&]() {
799 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIOuter",
800 OuterLoop->getStartLoc(),
801 OuterLoop->getHeader())
802 << "Only outer loops with induction or reduction PHI nodes can be"
803 " interchanged currently.";
804 });
805 return true;
806 }
807
808 Inductions.clear();
809 // For multi-level loop nests, make sure that all phi nodes for inner loops
810 // at all levels can be recognized as a induction or reduction phi. Bail out
811 // if a phi node at a certain nesting level cannot be properly recognized.
812 Loop *CurLevelLoop = OuterLoop;
813 while (!CurLevelLoop->getSubLoops().empty()) {
7
Assuming the condition is false
8
Loop condition is false. Execution continues on line 832
814 // We already made sure that the loop nest is tightly nested.
815 CurLevelLoop = CurLevelLoop->getSubLoops().front();
816 if (!findInductionAndReductions(CurLevelLoop, Inductions, nullptr)) {
817 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
818 dbgs() << "Only inner loops with induction or reduction PHI nodes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
819 << "are supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
;
820 ORE->emit([&]() {
821 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIInner",
822 CurLevelLoop->getStartLoc(),
823 CurLevelLoop->getHeader())
824 << "Only inner loops with induction or reduction PHI nodes can be"
825 " interchange currently.";
826 });
827 return true;
828 }
829 }
830
831 // TODO: Triangular loops are not handled for now.
832 if (!isLoopStructureUnderstood()) {
9
Calling 'LoopInterchangeLegality::isLoopStructureUnderstood'
833 LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop structure not understood by pass\n"
; } } while (false)
;
834 ORE->emit([&]() {
835 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedStructureInner",
836 InnerLoop->getStartLoc(),
837 InnerLoop->getHeader())
838 << "Inner loop structure not understood currently.";
839 });
840 return true;
841 }
842
843 return false;
844}
845
846bool LoopInterchangeLegality::findInductions(
847 Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
848 for (PHINode &PHI : L->getHeader()->phis()) {
849 InductionDescriptor ID;
850 if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
851 Inductions.push_back(&PHI);
852 }
853 return !Inductions.empty();
854}
855
856// We currently only support LCSSA PHI nodes in the inner loop exit, if their
857// users are either reduction PHIs or PHIs outside the outer loop (which means
858// the we are only interested in the final value after the loop).
859static bool
860areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL,
861 SmallPtrSetImpl<PHINode *> &Reductions) {
862 BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
863 for (PHINode &PHI : InnerExit->phis()) {
864 // Reduction lcssa phi will have only 1 incoming block that from loop latch.
865 if (PHI.getNumIncomingValues() > 1)
866 return false;
867 if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
868 PHINode *PN = dyn_cast<PHINode>(U);
869 return !PN ||
870 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
871 })) {
872 return false;
873 }
874 }
875 return true;
876}
877
878// We currently support LCSSA PHI nodes in the outer loop exit, if their
879// incoming values do not come from the outer loop latch or if the
880// outer loop latch has a single predecessor. In that case, the value will
881// be available if both the inner and outer loop conditions are true, which
882// will still be true after interchanging. If we have multiple predecessor,
883// that may not be the case, e.g. because the outer loop latch may be executed
884// if the inner loop is not executed.
885static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
886 BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
887 for (PHINode &PHI : LoopNestExit->phis()) {
888 for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
889 Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
890 if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
891 continue;
892
893 // The incoming value is defined in the outer loop latch. Currently we
894 // only support that in case the outer loop latch has a single predecessor.
895 // This guarantees that the outer loop latch is executed if and only if
896 // the inner loop is executed (because tightlyNested() guarantees that the
897 // outer loop header only branches to the inner loop or the outer loop
898 // latch).
899 // FIXME: We could weaken this logic and allow multiple predecessors,
900 // if the values are produced outside the loop latch. We would need
901 // additional logic to update the PHI nodes in the exit block as
902 // well.
903 if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
904 return false;
905 }
906 }
907 return true;
908}
909
910// In case of multi-level nested loops, it may occur that lcssa phis exist in
911// the latch of InnerLoop, i.e., when defs of the incoming values are further
912// inside the loopnest. Sometimes those incoming values are not available
913// after interchange, since the original inner latch will become the new outer
914// latch which may have predecessor paths that do not include those incoming
915// values.
916// TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of
917// multi-level loop nests.
918static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
919 if (InnerLoop->getSubLoops().empty())
920 return true;
921 // If the original outer latch has only one predecessor, then values defined
922 // further inside the looploop, e.g., in the innermost loop, will be available
923 // at the new outer latch after interchange.
924 if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr)
925 return true;
926
927 // The outer latch has more than one predecessors, i.e., the inner
928 // exit and the inner header.
929 // PHI nodes in the inner latch are lcssa phis where the incoming values
930 // are defined further inside the loopnest. Check if those phis are used
931 // in the original inner latch. If that is the case then bail out since
932 // those incoming values may not be available at the new outer latch.
933 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
934 for (PHINode &PHI : InnerLoopLatch->phis()) {
935 for (auto *U : PHI.users()) {
936 Instruction *UI = cast<Instruction>(U);
937 if (InnerLoopLatch == UI->getParent())
938 return false;
939 }
940 }
941 return true;
942}
943
944bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
945 unsigned OuterLoopId,
946 CharMatrix &DepMatrix) {
947 if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
948 LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
949 << " and OuterLoopId = " << OuterLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
950 << " due to dependence\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
;
951 ORE->emit([&]() {
952 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "Dependence",
953 InnerLoop->getStartLoc(),
954 InnerLoop->getHeader())
955 << "Cannot interchange loops due to dependences.";
956 });
957 return false;
958 }
959 // Check if outer and inner loop contain legal instructions only.
960 for (auto *BB : OuterLoop->blocks())
961 for (Instruction &I : BB->instructionsWithoutDebug())
962 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
963 // readnone functions do not prevent interchanging.
964 if (CI->onlyWritesMemory())
965 continue;
966 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
967 dbgs() << "Loops with call instructions cannot be interchanged "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
968 << "safely.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
;
969 ORE->emit([&]() {
970 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "CallInst",
971 CI->getDebugLoc(),
972 CI->getParent())
973 << "Cannot interchange loops due to call instruction.";
974 });
975
976 return false;
977 }
978
979 if (!findInductions(InnerLoop, InnerLoopInductions)) {
980 LLVM_DEBUG(dbgs() << "Cound not find inner loop induction variables.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cound not find inner loop induction variables.\n"
; } } while (false)
;
981 return false;
982 }
983
984 if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) {
985 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in inner loop latch.\n"
; } } while (false)
;
986 ORE->emit([&]() {
987 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedInnerLatchPHI",
988 InnerLoop->getStartLoc(),
989 InnerLoop->getHeader())
990 << "Cannot interchange loops because unsupported PHI nodes found "
991 "in inner loop latch.";
992 });
993 return false;
994 }
995
996 // TODO: The loops could not be interchanged due to current limitations in the
997 // transform module.
998 if (currentLimitations()) {
999 LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not legal because of current transform limitation\n"
; } } while (false)
;
1000 return false;
1001 }
1002
1003 // Check if the loops are tightly nested.
1004 if (!tightlyNested(OuterLoop, InnerLoop)) {
1005 LLVM_DEBUG(dbgs() << "Loops not tightly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops not tightly nested\n"
; } } while (false)
;
1006 ORE->emit([&]() {
1007 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NotTightlyNested",
1008 InnerLoop->getStartLoc(),
1009 InnerLoop->getHeader())
1010 << "Cannot interchange loops because they are not tightly "
1011 "nested.";
1012 });
1013 return false;
1014 }
1015
1016 if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1017 OuterInnerReductions)) {
1018 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in inner loop exit.\n"
; } } while (false)
;
1019 ORE->emit([&]() {
1020 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedExitPHI",
1021 InnerLoop->getStartLoc(),
1022 InnerLoop->getHeader())
1023 << "Found unsupported PHI node in loop exit.";
1024 });
1025 return false;
1026 }
1027
1028 if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1029 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in outer loop exit.\n"
; } } while (false)
;
1030 ORE->emit([&]() {
1031 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedExitPHI",
1032 OuterLoop->getStartLoc(),
1033 OuterLoop->getHeader())
1034 << "Found unsupported PHI node in loop exit.";
1035 });
1036 return false;
1037 }
1038
1039 return true;
1040}
1041
1042int LoopInterchangeProfitability::getInstrOrderCost() {
1043 unsigned GoodOrder, BadOrder;
1044 BadOrder = GoodOrder = 0;
1045 for (BasicBlock *BB : InnerLoop->blocks()) {
1046 for (Instruction &Ins : *BB) {
1047 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1048 unsigned NumOp = GEP->getNumOperands();
1049 bool FoundInnerInduction = false;
1050 bool FoundOuterInduction = false;
1051 for (unsigned i = 0; i < NumOp; ++i) {
1052 // Skip operands that are not SCEV-able.
1053 if (!SE->isSCEVable(GEP->getOperand(i)->getType()))
1054 continue;
1055
1056 const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
1057 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1058 if (!AR)
1059 continue;
1060
1061 // If we find the inner induction after an outer induction e.g.
1062 // for(int i=0;i<N;i++)
1063 // for(int j=0;j<N;j++)
1064 // A[i][j] = A[i-1][j-1]+k;
1065 // then it is a good order.
1066 if (AR->getLoop() == InnerLoop) {
1067 // We found an InnerLoop induction after OuterLoop induction. It is
1068 // a good order.
1069 FoundInnerInduction = true;
1070 if (FoundOuterInduction) {
1071 GoodOrder++;
1072 break;
1073 }
1074 }
1075 // If we find the outer induction after an inner induction e.g.
1076 // for(int i=0;i<N;i++)
1077 // for(int j=0;j<N;j++)
1078 // A[j][i] = A[j-1][i-1]+k;
1079 // then it is a bad order.
1080 if (AR->getLoop() == OuterLoop) {
1081 // We found an OuterLoop induction after InnerLoop induction. It is
1082 // a bad order.
1083 FoundOuterInduction = true;
1084 if (FoundInnerInduction) {
1085 BadOrder++;
1086 break;
1087 }
1088 }
1089 }
1090 }
1091 }
1092 }
1093 return GoodOrder - BadOrder;
1094}
1095
1096std::optional<bool>
1097LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
1098 const DenseMap<const Loop *, unsigned> &CostMap,
1099 std::unique_ptr<CacheCost> &CC) {
1100 // This is the new cost model returned from loop cache analysis.
1101 // A smaller index means the loop should be placed an outer loop, and vice
1102 // versa.
1103 if (CostMap.contains(InnerLoop) && CostMap.contains(OuterLoop)) {
1104 unsigned InnerIndex = 0, OuterIndex = 0;
1105 InnerIndex = CostMap.find(InnerLoop)->second;
1106 OuterIndex = CostMap.find(OuterLoop)->second;
1107 LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndexdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "InnerIndex = " <<
InnerIndex << ", OuterIndex = " << OuterIndex <<
"\n"; } } while (false)
1108 << ", OuterIndex = " << OuterIndex << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "InnerIndex = " <<
InnerIndex << ", OuterIndex = " << OuterIndex <<
"\n"; } } while (false)
;
1109 if (InnerIndex < OuterIndex)
1110 return std::optional<bool>(true);
1111 assert(InnerIndex != OuterIndex && "CostMap should assign unique "(static_cast <bool> (InnerIndex != OuterIndex &&
"CostMap should assign unique " "numbers to each loop") ? void
(0) : __assert_fail ("InnerIndex != OuterIndex && \"CostMap should assign unique \" \"numbers to each loop\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1112, __extension__
__PRETTY_FUNCTION__))
1112 "numbers to each loop")(static_cast <bool> (InnerIndex != OuterIndex &&
"CostMap should assign unique " "numbers to each loop") ? void
(0) : __assert_fail ("InnerIndex != OuterIndex && \"CostMap should assign unique \" \"numbers to each loop\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1112, __extension__
__PRETTY_FUNCTION__))
;
1113 if (CC->getLoopCost(*OuterLoop) == CC->getLoopCost(*InnerLoop))
1114 return std::nullopt;
1115 return std::optional<bool>(false);
1116 }
1117 return std::nullopt;
1118}
1119
1120std::optional<bool>
1121LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
1122 // Legacy cost model: this is rough cost estimation algorithm. It counts the
1123 // good and bad order of induction variables in the instruction and allows
1124 // reordering if number of bad orders is more than good.
1125 int Cost = getInstrOrderCost();
1126 LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cost = " << Cost
<< "\n"; } } while (false)
;
1127 if (Cost < 0 && Cost < LoopInterchangeCostThreshold)
1128 return std::optional<bool>(true);
1129
1130 return std::nullopt;
1131}
1132
1133std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1134 unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) {
1135 for (auto &Row : DepMatrix) {
1136 // If the inner loop is loop independent or doesn't carry any dependency
1137 // it is not profitable to move this to outer position, since we are
1138 // likely able to do inner loop vectorization already.
1139 if (Row[InnerLoopId] == 'I' || Row[InnerLoopId] == '=')
1140 return std::optional<bool>(false);
1141
1142 // If the outer loop is not loop independent it is not profitable to move
1143 // this to inner position, since doing so would not enable inner loop
1144 // parallelism.
1145 if (Row[OuterLoopId] != 'I' && Row[OuterLoopId] != '=')
1146 return std::optional<bool>(false);
1147 }
1148 // If inner loop has dependence and outer loop is loop independent then it
1149 // is/ profitable to interchange to enable inner loop parallelism.
1150 // If there are no dependences, interchanging will not improve anything.
1151 return std::optional<bool>(!DepMatrix.empty());
1152}
1153
1154bool LoopInterchangeProfitability::isProfitable(
1155 const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
1156 unsigned OuterLoopId, CharMatrix &DepMatrix,
1157 const DenseMap<const Loop *, unsigned> &CostMap,
1158 std::unique_ptr<CacheCost> &CC) {
1159 // isProfitable() is structured to avoid endless loop interchange.
1160 // If loop cache analysis could decide the profitability then,
1161 // profitability check will stop and return the analysis result.
1162 // If cache analysis failed to analyze the loopnest (e.g.,
1163 // due to delinearization issues) then only check whether it is
1164 // profitable for InstrOrderCost. Likewise, if InstrOrderCost failed to
1165 // analysis the profitability then only, isProfitableForVectorization
1166 // will decide.
1167 std::optional<bool> shouldInterchange =
1168 isProfitablePerLoopCacheAnalysis(CostMap, CC);
1169 if (!shouldInterchange.has_value()) {
1170 shouldInterchange = isProfitablePerInstrOrderCost();
1171 if (!shouldInterchange.has_value())
1172 shouldInterchange =
1173 isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
1174 }
1175 if (!shouldInterchange.has_value()) {
1176 ORE->emit([&]() {
1177 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "InterchangeNotProfitable",
1178 InnerLoop->getStartLoc(),
1179 InnerLoop->getHeader())
1180 << "Insufficient information to calculate the cost of loop for "
1181 "interchange.";
1182 });
1183 return false;
1184 } else if (!shouldInterchange.value()) {
1185 ORE->emit([&]() {
1186 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "InterchangeNotProfitable",
1187 InnerLoop->getStartLoc(),
1188 InnerLoop->getHeader())
1189 << "Interchanging loops is not considered to improve cache "
1190 "locality nor vectorization.";
1191 });
1192 return false;
1193 }
1194 return true;
1195}
1196
1197void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1198 Loop *InnerLoop) {
1199 for (Loop *L : *OuterLoop)
1200 if (L == InnerLoop) {
1201 OuterLoop->removeChildLoop(L);
1202 return;
1203 }
1204 llvm_unreachable("Couldn't find loop")::llvm::llvm_unreachable_internal("Couldn't find loop", "llvm/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1204)
;
1205}
1206
1207/// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1208/// new inner and outer loop after interchanging: NewInner is the original
1209/// outer loop and NewOuter is the original inner loop.
1210///
1211/// Before interchanging, we have the following structure
1212/// Outer preheader
1213// Outer header
1214// Inner preheader
1215// Inner header
1216// Inner body
1217// Inner latch
1218// outer bbs
1219// Outer latch
1220//
1221// After interchanging:
1222// Inner preheader
1223// Inner header
1224// Outer preheader
1225// Outer header
1226// Inner body
1227// outer bbs
1228// Outer latch
1229// Inner latch
1230void LoopInterchangeTransform::restructureLoops(
1231 Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1232 BasicBlock *OrigOuterPreHeader) {
1233 Loop *OuterLoopParent = OuterLoop->getParentLoop();
1234 // The original inner loop preheader moves from the new inner loop to
1235 // the parent loop, if there is one.
1236 NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1237 LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1238
1239 // Switch the loop levels.
1240 if (OuterLoopParent) {
1241 // Remove the loop from its parent loop.
1242 removeChildLoop(OuterLoopParent, NewInner);
1243 removeChildLoop(NewInner, NewOuter);
1244 OuterLoopParent->addChildLoop(NewOuter);
1245 } else {
1246 removeChildLoop(NewInner, NewOuter);
1247 LI->changeTopLevelLoop(NewInner, NewOuter);
1248 }
1249 while (!NewOuter->isInnermost())
1250 NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1251 NewOuter->addChildLoop(NewInner);
1252
1253 // BBs from the original inner loop.
1254 SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1255
1256 // Add BBs from the original outer loop to the original inner loop (excluding
1257 // BBs already in inner loop)
1258 for (BasicBlock *BB : NewInner->blocks())
1259 if (LI->getLoopFor(BB) == NewInner)
1260 NewOuter->addBlockEntry(BB);
1261
1262 // Now remove inner loop header and latch from the new inner loop and move
1263 // other BBs (the loop body) to the new inner loop.
1264 BasicBlock *OuterHeader = NewOuter->getHeader();
1265 BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1266 for (BasicBlock *BB : OrigInnerBBs) {
1267 // Nothing will change for BBs in child loops.
1268 if (LI->getLoopFor(BB) != NewOuter)
1269 continue;
1270 // Remove the new outer loop header and latch from the new inner loop.
1271 if (BB == OuterHeader || BB == OuterLatch)
1272 NewInner->removeBlockFromLoop(BB);
1273 else
1274 LI->changeLoopFor(BB, NewInner);
1275 }
1276
1277 // The preheader of the original outer loop becomes part of the new
1278 // outer loop.
1279 NewOuter->addBlockEntry(OrigOuterPreHeader);
1280 LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1281
1282 // Tell SE that we move the loops around.
1283 SE->forgetLoop(NewOuter);
1284}
1285
1286bool LoopInterchangeTransform::transform() {
1287 bool Transformed = false;
1288
1289 if (InnerLoop->getSubLoops().empty()) {
1290 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1291 LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Splitting the inner loop latch\n"
; } } while (false)
;
1292 auto &InductionPHIs = LIL.getInnerLoopInductions();
1293 if (InductionPHIs.empty()) {
1294 LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to find the point to split loop latch \n"
; } } while (false)
;
1295 return false;
1296 }
1297
1298 SmallVector<Instruction *, 8> InnerIndexVarList;
1299 for (PHINode *CurInductionPHI : InductionPHIs) {
1300 if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1301 InnerIndexVarList.push_back(
1302 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
1303 else
1304 InnerIndexVarList.push_back(
1305 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
1306 }
1307
1308 // Create a new latch block for the inner loop. We split at the
1309 // current latch's terminator and then move the condition and all
1310 // operands that are not either loop-invariant or the induction PHI into the
1311 // new latch block.
1312 BasicBlock *NewLatch =
1313 SplitBlock(InnerLoop->getLoopLatch(),
1314 InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
1315
1316 SmallSetVector<Instruction *, 4> WorkList;
1317 unsigned i = 0;
1318 auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() {
1319 for (; i < WorkList.size(); i++) {
1320 // Duplicate instruction and move it the new latch. Update uses that
1321 // have been moved.
1322 Instruction *NewI = WorkList[i]->clone();
1323 NewI->insertBefore(NewLatch->getFirstNonPHI());
1324 assert(!NewI->mayHaveSideEffects() &&(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1326, __extension__
__PRETTY_FUNCTION__))
1325 "Moving instructions with side-effects may change behavior of "(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1326, __extension__
__PRETTY_FUNCTION__))
1326 "the loop nest!")(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1326, __extension__
__PRETTY_FUNCTION__))
;
1327 for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) {
1328 Instruction *UserI = cast<Instruction>(U.getUser());
1329 if (!InnerLoop->contains(UserI->getParent()) ||
1330 UserI->getParent() == NewLatch ||
1331 llvm::is_contained(InductionPHIs, UserI))
1332 U.set(NewI);
1333 }
1334 // Add operands of moved instruction to the worklist, except if they are
1335 // outside the inner loop or are the induction PHI.
1336 for (Value *Op : WorkList[i]->operands()) {
1337 Instruction *OpI = dyn_cast<Instruction>(Op);
1338 if (!OpI ||
1339 this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
1340 llvm::is_contained(InductionPHIs, OpI))
1341 continue;
1342 WorkList.insert(OpI);
1343 }
1344 }
1345 };
1346
1347 // FIXME: Should we interchange when we have a constant condition?
1348 Instruction *CondI = dyn_cast<Instruction>(
1349 cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
1350 ->getCondition());
1351 if (CondI)
1352 WorkList.insert(CondI);
1353 MoveInstructions();
1354 for (Instruction *InnerIndexVar : InnerIndexVarList)
1355 WorkList.insert(cast<Instruction>(InnerIndexVar));
1356 MoveInstructions();
1357 }
1358
1359 // Ensure the inner loop phi nodes have a separate basic block.
1360 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1361 if (InnerLoopHeader->getFirstNonPHI() != InnerLoopHeader->getTerminator()) {
1362 SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1363 LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "splitting InnerLoopHeader done\n"
; } } while (false)
;
1364 }
1365
1366 // Instructions in the original inner loop preheader may depend on values
1367 // defined in the outer loop header. Move them there, because the original
1368 // inner loop preheader will become the entry into the interchanged loop nest.
1369 // Currently we move all instructions and rely on LICM to move invariant
1370 // instructions outside the loop nest.
1371 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1372 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1373 if (InnerLoopPreHeader != OuterLoopHeader) {
1374 SmallPtrSet<Instruction *, 4> NeedsMoving;
1375 for (Instruction &I :
1376 make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
1377 std::prev(InnerLoopPreHeader->end()))))
1378 I.moveBefore(OuterLoopHeader->getTerminator());
1379 }
1380
1381 Transformed |= adjustLoopLinks();
1382 if (!Transformed) {
1383 LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "adjustLoopLinks failed\n"
; } } while (false)
;
1384 return false;
1385 }
1386
1387 return true;
1388}
1389
1390/// \brief Move all instructions except the terminator from FromBB right before
1391/// InsertBefore
1392static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1393 BasicBlock *ToBB = InsertBefore->getParent();
1394
1395 ToBB->splice(InsertBefore->getIterator(), FromBB, FromBB->begin(),
1396 FromBB->getTerminator()->getIterator());
1397}
1398
1399/// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
1400static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
1401 // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
1402 // from BB1 afterwards.
1403 auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
1404 SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
1405 for (Instruction *I : TempInstrs)
1406 I->removeFromParent();
1407
1408 // Move instructions from BB2 to BB1.
1409 moveBBContents(BB2, BB1->getTerminator());
1410
1411 // Move instructions from TempInstrs to BB2.
1412 for (Instruction *I : TempInstrs)
1413 I->insertBefore(BB2->getTerminator());
1414}
1415
1416// Update BI to jump to NewBB instead of OldBB. Records updates to the
1417// dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1418// \p OldBB is exactly once in BI's successor list.
1419static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1420 BasicBlock *NewBB,
1421 std::vector<DominatorTree::UpdateType> &DTUpdates,
1422 bool MustUpdateOnce = true) {
1423 assert((!MustUpdateOnce ||(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1427, __extension__
__PRETTY_FUNCTION__))
1424 llvm::count_if(successors(BI),(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1427, __extension__
__PRETTY_FUNCTION__))
1425 [OldBB](BasicBlock *BB) {(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1427, __extension__
__PRETTY_FUNCTION__))
1426 return BB == OldBB;(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1427, __extension__
__PRETTY_FUNCTION__))
1427 }) == 1) && "BI must jump to OldBB exactly once.")(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1427, __extension__
__PRETTY_FUNCTION__))
;
1428 bool Changed = false;
1429 for (Use &Op : BI->operands())
1430 if (Op == OldBB) {
1431 Op.set(NewBB);
1432 Changed = true;
1433 }
1434
1435 if (Changed) {
1436 DTUpdates.push_back(
1437 {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
1438 DTUpdates.push_back(
1439 {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
1440 }
1441 assert(Changed && "Expected a successor to be updated")(static_cast <bool> (Changed && "Expected a successor to be updated"
) ? void (0) : __assert_fail ("Changed && \"Expected a successor to be updated\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1441, __extension__
__PRETTY_FUNCTION__))
;
1442}
1443
1444// Move Lcssa PHIs to the right place.
1445static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
1446 BasicBlock *InnerLatch, BasicBlock *OuterHeader,
1447 BasicBlock *OuterLatch, BasicBlock *OuterExit,
1448 Loop *InnerLoop, LoopInfo *LI) {
1449
1450 // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
1451 // defined either in the header or latch. Those blocks will become header and
1452 // latch of the new outer loop, and the only possible users can PHI nodes
1453 // in the exit block of the loop nest or the outer loop header (reduction
1454 // PHIs, in that case, the incoming value must be defined in the inner loop
1455 // header). We can just substitute the user with the incoming value and remove
1456 // the PHI.
1457 for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
1458 assert(P.getNumIncomingValues() == 1 &&(static_cast <bool> (P.getNumIncomingValues() == 1 &&
"Only loops with a single exit are supported!") ? void (0) :
__assert_fail ("P.getNumIncomingValues() == 1 && \"Only loops with a single exit are supported!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1459, __extension__
__PRETTY_FUNCTION__))
1459 "Only loops with a single exit are supported!")(static_cast <bool> (P.getNumIncomingValues() == 1 &&
"Only loops with a single exit are supported!") ? void (0) :
__assert_fail ("P.getNumIncomingValues() == 1 && \"Only loops with a single exit are supported!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1459, __extension__
__PRETTY_FUNCTION__))
;
1460
1461 // Incoming values are guaranteed be instructions currently.
1462 auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
1463 // In case of multi-level nested loops, follow LCSSA to find the incoming
1464 // value defined from the innermost loop.
1465 auto IncIInnerMost = cast<Instruction>(followLCSSA(IncI));
1466 // Skip phis with incoming values from the inner loop body, excluding the
1467 // header and latch.
1468 if (IncIInnerMost->getParent() != InnerLatch &&
1469 IncIInnerMost->getParent() != InnerHeader)
1470 continue;
1471
1472 assert(all_of(P.users(),(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1480, __extension__
__PRETTY_FUNCTION__))
1473 [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1480, __extension__
__PRETTY_FUNCTION__))
1474 return (cast<PHINode>(U)->getParent() == OuterHeader &&(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1480, __extension__
__PRETTY_FUNCTION__))
1475 IncI->getParent() == InnerHeader) ||(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1480, __extension__
__PRETTY_FUNCTION__))
1476 cast<PHINode>(U)->getParent() == OuterExit;(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1480, __extension__
__PRETTY_FUNCTION__))
1477 }) &&(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1480, __extension__
__PRETTY_FUNCTION__))
1478 "Can only replace phis iff the uses are in the loop nest exit or "(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1480, __extension__
__PRETTY_FUNCTION__))
1479 "the incoming value is defined in the inner header (it will "(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1480, __extension__
__PRETTY_FUNCTION__))
1480 "dominate all loop blocks after interchanging)")(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1480, __extension__
__PRETTY_FUNCTION__))
;
1481 P.replaceAllUsesWith(IncI);
1482 P.eraseFromParent();
1483 }
1484
1485 SmallVector<PHINode *, 8> LcssaInnerExit;
1486 for (PHINode &P : InnerExit->phis())
1487 LcssaInnerExit.push_back(&P);
1488
1489 SmallVector<PHINode *, 8> LcssaInnerLatch;
1490 for (PHINode &P : InnerLatch->phis())
1491 LcssaInnerLatch.push_back(&P);
1492
1493 // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1494 // If a PHI node has users outside of InnerExit, it has a use outside the
1495 // interchanged loop and we have to preserve it. We move these to
1496 // InnerLatch, which will become the new exit block for the innermost
1497 // loop after interchanging.
1498 for (PHINode *P : LcssaInnerExit)
1499 P->moveBefore(InnerLatch->getFirstNonPHI());
1500
1501 // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1502 // and we have to move them to the new inner latch.
1503 for (PHINode *P : LcssaInnerLatch)
1504 P->moveBefore(InnerExit->getFirstNonPHI());
1505
1506 // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1507 // incoming values defined in the outer loop, we have to add a new PHI
1508 // in the inner loop latch, which became the exit block of the outer loop,
1509 // after interchanging.
1510 if (OuterExit) {
1511 for (PHINode &P : OuterExit->phis()) {
1512 if (P.getNumIncomingValues() != 1)
1513 continue;
1514 // Skip Phis with incoming values defined in the inner loop. Those should
1515 // already have been updated.
1516 auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
1517 if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
1518 continue;
1519
1520 PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
1521 NewPhi->setIncomingValue(0, P.getIncomingValue(0));
1522 NewPhi->setIncomingBlock(0, OuterLatch);
1523 // We might have incoming edges from other BBs, i.e., the original outer
1524 // header.
1525 for (auto *Pred : predecessors(InnerLatch)) {
1526 if (Pred == OuterLatch)
1527 continue;
1528 NewPhi->addIncoming(P.getIncomingValue(0), Pred);
1529 }
1530 NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
1531 P.setIncomingValue(0, NewPhi);
1532 }
1533 }
1534
1535 // Now adjust the incoming blocks for the LCSSA PHIs.
1536 // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1537 // with the new latch.
1538 InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
1539}
1540
1541bool LoopInterchangeTransform::adjustLoopBranches() {
1542 LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "adjustLoopBranches called\n"
; } } while (false)
;
1543 std::vector<DominatorTree::UpdateType> DTUpdates;
1544
1545 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1546 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1547
1548 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1550, __extension__
__PRETTY_FUNCTION__))
1549 InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1550, __extension__
__PRETTY_FUNCTION__))
1550 InnerLoopPreHeader && "Guaranteed by loop-simplify form")(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1550, __extension__
__PRETTY_FUNCTION__))
;
1551 // Ensure that both preheaders do not contain PHI nodes and have single
1552 // predecessors. This allows us to move them easily. We use
1553 // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1554 // preheaders do not satisfy those conditions.
1555 if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1556 !OuterLoopPreHeader->getUniquePredecessor())
1557 OuterLoopPreHeader =
1558 InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
1559 if (InnerLoopPreHeader == OuterLoop->getHeader())
1560 InnerLoopPreHeader =
1561 InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
1562
1563 // Adjust the loop preheader
1564 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1565 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1566 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1567 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1568 BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1569 BasicBlock *InnerLoopLatchPredecessor =
1570 InnerLoopLatch->getUniquePredecessor();
1571 BasicBlock *InnerLoopLatchSuccessor;
1572 BasicBlock *OuterLoopLatchSuccessor;
1573
1574 BranchInst *OuterLoopLatchBI =
1575 dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1576 BranchInst *InnerLoopLatchBI =
1577 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1578 BranchInst *OuterLoopHeaderBI =
1579 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1580 BranchInst *InnerLoopHeaderBI =
1581 dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1582
1583 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1584 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1585 !InnerLoopHeaderBI)
1586 return false;
1587
1588 BranchInst *InnerLoopLatchPredecessorBI =
1589 dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1590 BranchInst *OuterLoopPredecessorBI =
1591 dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1592
1593 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1594 return false;
1595 BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1596 if (!InnerLoopHeaderSuccessor)
1597 return false;
1598
1599 // Adjust Loop Preheader and headers.
1600 // The branches in the outer loop predecessor and the outer loop header can
1601 // be unconditional branches or conditional branches with duplicates. Consider
1602 // this when updating the successors.
1603 updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1604 InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
1605 // The outer loop header might or might not branch to the outer latch.
1606 // We are guaranteed to branch to the inner loop preheader.
1607 if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
1608 // In this case the outerLoopHeader should branch to the InnerLoopLatch.
1609 updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
1610 DTUpdates,
1611 /*MustUpdateOnce=*/false);
1612 }
1613 updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1614 InnerLoopHeaderSuccessor, DTUpdates,
1615 /*MustUpdateOnce=*/false);
1616
1617 // Adjust reduction PHI's now that the incoming block has changed.
1618 InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
1619 OuterLoopHeader);
1620
1621 updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1622 OuterLoopPreHeader, DTUpdates);
1623
1624 // -------------Adjust loop latches-----------
1625 if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1626 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1627 else
1628 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1629
1630 updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1631 InnerLoopLatchSuccessor, DTUpdates);
1632
1633 if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1634 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1635 else
1636 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1637
1638 updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1639 OuterLoopLatchSuccessor, DTUpdates);
1640 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1641 DTUpdates);
1642
1643 DT->applyUpdates(DTUpdates);
1644 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1645 OuterLoopPreHeader);
1646
1647 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1648 OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
1649 InnerLoop, LI);
1650 // For PHIs in the exit block of the outer loop, outer's latch has been
1651 // replaced by Inners'.
1652 OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1653
1654 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1655 // Now update the reduction PHIs in the inner and outer loop headers.
1656 SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1657 for (PHINode &PHI : InnerLoopHeader->phis())
1658 if (OuterInnerReductions.contains(&PHI))
1659 InnerLoopPHIs.push_back(&PHI);
1660
1661 for (PHINode &PHI : OuterLoopHeader->phis())
1662 if (OuterInnerReductions.contains(&PHI))
1663 OuterLoopPHIs.push_back(&PHI);
1664
1665 // Now move the remaining reduction PHIs from outer to inner loop header and
1666 // vice versa. The PHI nodes must be part of a reduction across the inner and
1667 // outer loop and all the remains to do is and updating the incoming blocks.
1668 for (PHINode *PHI : OuterLoopPHIs) {
1669 LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Outer loop reduction PHIs:\n"
; PHI->dump();; } } while (false)
;
1670 PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
1671 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node")(static_cast <bool> (OuterInnerReductions.count(PHI) &&
"Expected a reduction PHI node") ? void (0) : __assert_fail (
"OuterInnerReductions.count(PHI) && \"Expected a reduction PHI node\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1671, __extension__
__PRETTY_FUNCTION__))
;
1672 }
1673 for (PHINode *PHI : InnerLoopPHIs) {
1674 LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop reduction PHIs:\n"
; PHI->dump();; } } while (false)
;
1675 PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
1676 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node")(static_cast <bool> (OuterInnerReductions.count(PHI) &&
"Expected a reduction PHI node") ? void (0) : __assert_fail (
"OuterInnerReductions.count(PHI) && \"Expected a reduction PHI node\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1676, __extension__
__PRETTY_FUNCTION__))
;
1677 }
1678
1679 // Update the incoming blocks for moved PHI nodes.
1680 OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
1681 OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
1682 InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
1683 InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1684
1685 // Values defined in the outer loop header could be used in the inner loop
1686 // latch. In that case, we need to create LCSSA phis for them, because after
1687 // interchanging they will be defined in the new inner loop and used in the
1688 // new outer loop.
1689 IRBuilder<> Builder(OuterLoopHeader->getContext());
1690 SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
1691 for (Instruction &I :
1692 make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
1693 MayNeedLCSSAPhis.push_back(&I);
1694 formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, Builder);
1695
1696 return true;
1697}
1698
1699bool LoopInterchangeTransform::adjustLoopLinks() {
1700 // Adjust all branches in the inner and outer loop.
1701 bool Changed = adjustLoopBranches();
1702 if (Changed) {
1703 // We have interchanged the preheaders so we need to interchange the data in
1704 // the preheaders as well. This is because the content of the inner
1705 // preheader was previously executed inside the outer loop.
1706 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1707 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1708 swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
1709 }
1710 return Changed;
1711}
1712
1713PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
1714 LoopAnalysisManager &AM,
1715 LoopStandardAnalysisResults &AR,
1716 LPMUpdater &U) {
1717 Function &F = *LN.getParent();
1718
1719 DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
1720 std::unique_ptr<CacheCost> CC =
1721 CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
1722 OptimizationRemarkEmitter ORE(&F);
1723 if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
1724 return PreservedAnalyses::all();
1725 U.markLoopNestChanged(true);
1726 return getLoopPassPreservedAnalyses();
1727}