Bug Summary

File:build/source/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
Warning:line 1527, column 7
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name LoopInterchange.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm -resource-dir /usr/lib/llvm-17/lib/clang/17 -I lib/Transforms/Scalar -I /build/source/llvm/lib/Transforms/Scalar -I include -I /build/source/llvm/include -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm=build-llvm -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm=build-llvm -fcoverage-prefix-map=/build/source/= -source-date-epoch 1677881322 -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm -fdebug-prefix-map=/build/source/build-llvm=build-llvm -fdebug-prefix-map=/build/source/= -fdebug-prefix-map=/build/source/build-llvm=build-llvm -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-03-04-000721-16503-1 -x c++ /build/source/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
1//===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This Pass handles loop interchange transform.
10// This pass interchanges loops to provide a more cache-friendly memory access
11// patterns.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/Scalar/LoopInterchange.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/Analysis/DependenceAnalysis.h"
21#include "llvm/Analysis/LoopCacheAnalysis.h"
22#include "llvm/Analysis/LoopInfo.h"
23#include "llvm/Analysis/LoopNestAnalysis.h"
24#include "llvm/Analysis/LoopPass.h"
25#include "llvm/Analysis/OptimizationRemarkEmitter.h"
26#include "llvm/Analysis/ScalarEvolution.h"
27#include "llvm/Analysis/ScalarEvolutionExpressions.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/Constants.h"
30#include "llvm/IR/DiagnosticInfo.h"
31#include "llvm/IR/Dominators.h"
32#include "llvm/IR/Function.h"
33#include "llvm/IR/IRBuilder.h"
34#include "llvm/IR/InstrTypes.h"
35#include "llvm/IR/Instruction.h"
36#include "llvm/IR/Instructions.h"
37#include "llvm/IR/User.h"
38#include "llvm/IR/Value.h"
39#include "llvm/InitializePasses.h"
40#include "llvm/Pass.h"
41#include "llvm/Support/Casting.h"
42#include "llvm/Support/CommandLine.h"
43#include "llvm/Support/Debug.h"
44#include "llvm/Support/ErrorHandling.h"
45#include "llvm/Support/raw_ostream.h"
46#include "llvm/Transforms/Scalar.h"
47#include "llvm/Transforms/Scalar/LoopPassManager.h"
48#include "llvm/Transforms/Utils/BasicBlockUtils.h"
49#include "llvm/Transforms/Utils/LoopUtils.h"
50#include <cassert>
51#include <utility>
52#include <vector>
53
54using namespace llvm;
55
56#define DEBUG_TYPE"loop-interchange" "loop-interchange"
57
58STATISTIC(LoopsInterchanged, "Number of loops interchanged")static llvm::Statistic LoopsInterchanged = {"loop-interchange"
, "LoopsInterchanged", "Number of loops interchanged"}
;
59
60static cl::opt<int> LoopInterchangeCostThreshold(
61 "loop-interchange-threshold", cl::init(0), cl::Hidden,
62 cl::desc("Interchange if you gain more than this number"));
63
64namespace {
65
66using LoopVector = SmallVector<Loop *, 8>;
67
68// TODO: Check if we can use a sparse matrix here.
69using CharMatrix = std::vector<std::vector<char>>;
70
71} // end anonymous namespace
72
73// Maximum number of dependencies that can be handled in the dependency matrix.
74static const unsigned MaxMemInstrCount = 100;
75
76// Maximum loop depth supported.
77static const unsigned MaxLoopNestDepth = 10;
78
79#ifdef DUMP_DEP_MATRICIES
80static void printDepMatrix(CharMatrix &DepMatrix) {
81 for (auto &Row : DepMatrix) {
82 for (auto D : Row)
83 LLVM_DEBUG(dbgs() << D << " ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << D << " "; } } while
(false)
;
84 LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "\n"; } } while (false
)
;
85 }
86}
87#endif
88
89static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
90 Loop *L, DependenceInfo *DI,
91 ScalarEvolution *SE) {
92 using ValueVector = SmallVector<Value *, 16>;
93
94 ValueVector MemInstr;
95
96 // For each block.
97 for (BasicBlock *BB : L->blocks()) {
98 // Scan the BB and collect legal loads and stores.
99 for (Instruction &I : *BB) {
100 if (!isa<Instruction>(I))
101 return false;
102 if (auto *Ld = dyn_cast<LoadInst>(&I)) {
103 if (!Ld->isSimple())
104 return false;
105 MemInstr.push_back(&I);
106 } else if (auto *St = dyn_cast<StoreInst>(&I)) {
107 if (!St->isSimple())
108 return false;
109 MemInstr.push_back(&I);
110 }
111 }
112 }
113
114 LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found " << MemInstr
.size() << " Loads and Stores to analyze\n"; } } while (
false)
115 << " Loads and Stores to analyze\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found " << MemInstr
.size() << " Loads and Stores to analyze\n"; } } while (
false)
;
116
117 ValueVector::iterator I, IE, J, JE;
118
119 for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
120 for (J = I, JE = MemInstr.end(); J != JE; ++J) {
121 std::vector<char> Dep;
122 Instruction *Src = cast<Instruction>(*I);
123 Instruction *Dst = cast<Instruction>(*J);
124 // Ignore Input dependencies.
125 if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
126 continue;
127 // Track Output, Flow, and Anti dependencies.
128 if (auto D = DI->depends(Src, Dst, true)) {
129 assert(D->isOrdered() && "Expected an output, flow or anti dep.")(static_cast <bool> (D->isOrdered() && "Expected an output, flow or anti dep."
) ? void (0) : __assert_fail ("D->isOrdered() && \"Expected an output, flow or anti dep.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 129, __extension__
__PRETTY_FUNCTION__))
;
130 // If the direction vector is negative, normalize it to
131 // make it non-negative.
132 if (D->normalize(SE))
133 LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Negative dependence vector normalized.\n"
; } } while (false)
;
134 LLVM_DEBUG(StringRef DepType =do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
135 D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
136 dbgs() << "Found " << DepTypedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
137 << " dependency between Src and Dst\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
138 << " Src:" << *Src << "\n Dst:" << *Dst << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti" : "output"; dbgs() << "Found "
<< DepType << " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *
Dst << '\n'; } } while (false)
;
139 unsigned Levels = D->getLevels();
140 char Direction;
141 for (unsigned II = 1; II <= Levels; ++II) {
142 if (D->isScalar(II)) {
143 Direction = 'S';
144 Dep.push_back(Direction);
145 } else {
146 unsigned Dir = D->getDirection(II);
147 if (Dir == Dependence::DVEntry::LT ||
148 Dir == Dependence::DVEntry::LE)
149 Direction = '<';
150 else if (Dir == Dependence::DVEntry::GT ||
151 Dir == Dependence::DVEntry::GE)
152 Direction = '>';
153 else if (Dir == Dependence::DVEntry::EQ)
154 Direction = '=';
155 else
156 Direction = '*';
157 Dep.push_back(Direction);
158 }
159 }
160 while (Dep.size() != Level) {
161 Dep.push_back('I');
162 }
163
164 DepMatrix.push_back(Dep);
165 if (DepMatrix.size() > MaxMemInstrCount) {
166 LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle more than "
<< MaxMemInstrCount << " dependencies inside loop\n"
; } } while (false)
167 << " dependencies inside loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle more than "
<< MaxMemInstrCount << " dependencies inside loop\n"
; } } while (false)
;
168 return false;
169 }
170 }
171 }
172 }
173
174 return true;
175}
176
177// A loop is moved from index 'from' to an index 'to'. Update the Dependence
178// matrix by exchanging the two columns.
179static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
180 unsigned ToIndx) {
181 for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I)
182 std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]);
183}
184
185// After interchanging, check if the direction vector is valid.
186// [Theorem] A permutation of the loops in a perfect nest is legal if and only
187// if the direction matrix, after the same permutation is applied to its
188// columns, has no ">" direction as the leftmost non-"=" direction in any row.
189static bool isLexicographicallyPositive(std::vector<char> &DV) {
190 for (unsigned Level = 0; Level < DV.size(); ++Level) {
191 unsigned char Direction = DV[Level];
192 if (Direction == '<')
193 return true;
194 if (Direction == '>' || Direction == '*')
195 return false;
196 }
197 return true;
198}
199
200// Checks if it is legal to interchange 2 loops.
201static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
202 unsigned InnerLoopId,
203 unsigned OuterLoopId) {
204 unsigned NumRows = DepMatrix.size();
205 std::vector<char> Cur;
206 // For each row check if it is valid to interchange.
207 for (unsigned Row = 0; Row < NumRows; ++Row) {
208 // Create temporary DepVector check its lexicographical order
209 // before and after swapping OuterLoop vs InnerLoop
210 Cur = DepMatrix[Row];
211 if (!isLexicographicallyPositive(Cur))
212 return false;
213 std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
214 if (!isLexicographicallyPositive(Cur))
215 return false;
216 }
217 return true;
218}
219
220static void populateWorklist(Loop &L, LoopVector &LoopList) {
221 LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
222 << L.getHeader()->getParent()->getName() << " Loop: %"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
223 << L.getHeader()->getName() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() <<
" Loop: %" << L.getHeader()->getName() << '\n'
; } } while (false)
;
224 assert(LoopList.empty() && "LoopList should initially be empty!")(static_cast <bool> (LoopList.empty() && "LoopList should initially be empty!"
) ? void (0) : __assert_fail ("LoopList.empty() && \"LoopList should initially be empty!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 224, __extension__
__PRETTY_FUNCTION__))
;
225 Loop *CurrentLoop = &L;
226 const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
227 while (!Vec->empty()) {
228 // The current loop has multiple subloops in it hence it is not tightly
229 // nested.
230 // Discard all loops above it added into Worklist.
231 if (Vec->size() != 1) {
232 LoopList = {};
233 return;
234 }
235
236 LoopList.push_back(CurrentLoop);
237 CurrentLoop = Vec->front();
238 Vec = &CurrentLoop->getSubLoops();
239 }
240 LoopList.push_back(CurrentLoop);
241}
242
243namespace {
244
245/// LoopInterchangeLegality checks if it is legal to interchange the loop.
246class LoopInterchangeLegality {
247public:
248 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
249 OptimizationRemarkEmitter *ORE)
250 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
251
252 /// Check if the loops can be interchanged.
253 bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
254 CharMatrix &DepMatrix);
255
256 /// Discover induction PHIs in the header of \p L. Induction
257 /// PHIs are added to \p Inductions.
258 bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
259
260 /// Check if the loop structure is understood. We do not handle triangular
261 /// loops for now.
262 bool isLoopStructureUnderstood();
263
264 bool currentLimitations();
265
266 const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
267 return OuterInnerReductions;
268 }
269
270 const SmallVectorImpl<PHINode *> &getInnerLoopInductions() const {
271 return InnerLoopInductions;
272 }
273
274private:
275 bool tightlyNested(Loop *Outer, Loop *Inner);
276 bool containsUnsafeInstructions(BasicBlock *BB);
277
278 /// Discover induction and reduction PHIs in the header of \p L. Induction
279 /// PHIs are added to \p Inductions, reductions are added to
280 /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
281 /// to be passed as \p InnerLoop.
282 bool findInductionAndReductions(Loop *L,
283 SmallVector<PHINode *, 8> &Inductions,
284 Loop *InnerLoop);
285
286 Loop *OuterLoop;
287 Loop *InnerLoop;
288
289 ScalarEvolution *SE;
290
291 /// Interface to emit optimization remarks.
292 OptimizationRemarkEmitter *ORE;
293
294 /// Set of reduction PHIs taking part of a reduction across the inner and
295 /// outer loop.
296 SmallPtrSet<PHINode *, 4> OuterInnerReductions;
297
298 /// Set of inner loop induction PHIs
299 SmallVector<PHINode *, 8> InnerLoopInductions;
300};
301
302/// LoopInterchangeProfitability checks if it is profitable to interchange the
303/// loop.
304class LoopInterchangeProfitability {
305public:
306 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
307 OptimizationRemarkEmitter *ORE)
308 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
309
310 /// Check if the loop interchange is profitable.
311 bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
312 unsigned InnerLoopId, unsigned OuterLoopId,
313 CharMatrix &DepMatrix,
314 const DenseMap<const Loop *, unsigned> &CostMap,
315 std::unique_ptr<CacheCost> &CC);
316
317private:
318 int getInstrOrderCost();
319 std::optional<bool> isProfitablePerLoopCacheAnalysis(
320 const DenseMap<const Loop *, unsigned> &CostMap,
321 std::unique_ptr<CacheCost> &CC);
322 std::optional<bool> isProfitablePerInstrOrderCost();
323 std::optional<bool> isProfitableForVectorization(unsigned InnerLoopId,
324 unsigned OuterLoopId,
325 CharMatrix &DepMatrix);
326 Loop *OuterLoop;
327 Loop *InnerLoop;
328
329 /// Scev analysis.
330 ScalarEvolution *SE;
331
332 /// Interface to emit optimization remarks.
333 OptimizationRemarkEmitter *ORE;
334};
335
336/// LoopInterchangeTransform interchanges the loop.
337class LoopInterchangeTransform {
338public:
339 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
340 LoopInfo *LI, DominatorTree *DT,
341 const LoopInterchangeLegality &LIL)
342 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
343
344 /// Interchange OuterLoop and InnerLoop.
345 bool transform();
346 void restructureLoops(Loop *NewInner, Loop *NewOuter,
347 BasicBlock *OrigInnerPreHeader,
348 BasicBlock *OrigOuterPreHeader);
349 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
350
351private:
352 bool adjustLoopLinks();
353 bool adjustLoopBranches();
354
355 Loop *OuterLoop;
356 Loop *InnerLoop;
357
358 /// Scev analysis.
359 ScalarEvolution *SE;
360
361 LoopInfo *LI;
362 DominatorTree *DT;
363
364 const LoopInterchangeLegality &LIL;
365};
366
367struct LoopInterchange {
368 ScalarEvolution *SE = nullptr;
369 LoopInfo *LI = nullptr;
370 DependenceInfo *DI = nullptr;
371 DominatorTree *DT = nullptr;
372 std::unique_ptr<CacheCost> CC = nullptr;
373
374 /// Interface to emit optimization remarks.
375 OptimizationRemarkEmitter *ORE;
376
377 LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
378 DominatorTree *DT, std::unique_ptr<CacheCost> &CC,
379 OptimizationRemarkEmitter *ORE)
380 : SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC)), ORE(ORE) {}
381
382 bool run(Loop *L) {
383 if (L->getParentLoop())
384 return false;
385 SmallVector<Loop *, 8> LoopList;
386 populateWorklist(*L, LoopList);
387 return processLoopList(LoopList);
388 }
389
390 bool run(LoopNest &LN) {
391 SmallVector<Loop *, 8> LoopList(LN.getLoops().begin(), LN.getLoops().end());
392 for (unsigned I = 1; I < LoopList.size(); ++I)
393 if (LoopList[I]->getParentLoop() != LoopList[I - 1])
394 return false;
395 return processLoopList(LoopList);
396 }
397
398 bool isComputableLoopNest(ArrayRef<Loop *> LoopList) {
399 for (Loop *L : LoopList) {
400 const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
401 if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
402 LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Couldn't compute backedge count\n"
; } } while (false)
;
403 return false;
404 }
405 if (L->getNumBackEdges() != 1) {
406 LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "NumBackEdges is not equal to 1\n"
; } } while (false)
;
407 return false;
408 }
409 if (!L->getExitingBlock()) {
410 LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop doesn't have unique exit block\n"
; } } while (false)
;
411 return false;
412 }
413 }
414 return true;
415 }
416
417 unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
418 // TODO: Add a better heuristic to select the loop to be interchanged based
419 // on the dependence matrix. Currently we select the innermost loop.
420 return LoopList.size() - 1;
421 }
422
423 bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
424 bool Changed = false;
425 unsigned LoopNestDepth = LoopList.size();
426 if (LoopNestDepth < 2) {
427 LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop doesn't contain minimum nesting level.\n"
; } } while (false)
;
428 return false;
429 }
430 if (LoopNestDepth > MaxLoopNestDepth) {
431 LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle loops of depth greater than "
<< MaxLoopNestDepth << "\n"; } } while (false)
432 << MaxLoopNestDepth << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cannot handle loops of depth greater than "
<< MaxLoopNestDepth << "\n"; } } while (false)
;
433 return false;
434 }
435 if (!isComputableLoopNest(LoopList)) {
436 LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not valid loop candidate for interchange\n"
; } } while (false)
;
437 return false;
438 }
439
440 LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepthdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing LoopList of size = "
<< LoopNestDepth << "\n"; } } while (false)
441 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing LoopList of size = "
<< LoopNestDepth << "\n"; } } while (false)
;
442
443 CharMatrix DependencyMatrix;
444 Loop *OuterMostLoop = *(LoopList.begin());
445 if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
446 OuterMostLoop, DI, SE)) {
447 LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Populating dependency matrix failed\n"
; } } while (false)
;
448 return false;
449 }
450#ifdef DUMP_DEP_MATRICIES
451 LLVM_DEBUG(dbgs() << "Dependence before interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Dependence before interchange\n"
; } } while (false)
;
452 printDepMatrix(DependencyMatrix);
453#endif
454
455 // Get the Outermost loop exit.
456 BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
457 if (!LoopNestExit) {
458 LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "OuterMostLoop needs an unique exit block"
; } } while (false)
;
459 return false;
460 }
461
462 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
463 // Obtain the loop vector returned from loop cache analysis beforehand,
464 // and put each <Loop, index> pair into a map for constant time query
465 // later. Indices in loop vector reprsent the optimal order of the
466 // corresponding loop, e.g., given a loopnest with depth N, index 0
467 // indicates the loop should be placed as the outermost loop and index N
468 // indicates the loop should be placed as the innermost loop.
469 //
470 // For the old pass manager CacheCost would be null.
471 DenseMap<const Loop *, unsigned> CostMap;
472 if (CC != nullptr) {
473 const auto &LoopCosts = CC->getLoopCosts();
474 for (unsigned i = 0; i < LoopCosts.size(); i++) {
475 CostMap[LoopCosts[i].first] = i;
476 }
477 }
478 // We try to achieve the globally optimal memory access for the loopnest,
479 // and do interchange based on a bubble-sort fasion. We start from
480 // the innermost loop, move it outwards to the best possible position
481 // and repeat this process.
482 for (unsigned j = SelecLoopId; j > 0; j--) {
483 bool ChangedPerIter = false;
484 for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
485 bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
486 DependencyMatrix, CostMap);
487 if (!Interchanged)
488 continue;
489 // Loops interchanged, update LoopList accordingly.
490 std::swap(LoopList[i - 1], LoopList[i]);
491 // Update the DependencyMatrix
492 interChangeDependencies(DependencyMatrix, i, i - 1);
493#ifdef DUMP_DEP_MATRICIES
494 LLVM_DEBUG(dbgs() << "Dependence after interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Dependence after interchange\n"
; } } while (false)
;
495 printDepMatrix(DependencyMatrix);
496#endif
497 ChangedPerIter |= Interchanged;
498 Changed |= Interchanged;
499 }
500 // Early abort if there was no interchange during an entire round of
501 // moving loops outwards.
502 if (!ChangedPerIter)
503 break;
504 }
505 return Changed;
506 }
507
508 bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
509 unsigned OuterLoopId,
510 std::vector<std::vector<char>> &DependencyMatrix,
511 const DenseMap<const Loop *, unsigned> &CostMap) {
512 LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << "\n"; } } while (false)
513 << " and OuterLoopId = " << OuterLoopId << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Processing InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << "\n"; } } while (false)
;
514 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
515 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
516 LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not interchanging loops. Cannot prove legality.\n"
; } } while (false)
;
517 return false;
518 }
519 LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops are legal to interchange\n"
; } } while (false)
;
520 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
521 if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
522 DependencyMatrix, CostMap, CC)) {
523 LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Interchanging loops not profitable.\n"
; } } while (false)
;
524 return false;
525 }
526
527 ORE->emit([&]() {
528 return OptimizationRemark(DEBUG_TYPE"loop-interchange", "Interchanged",
529 InnerLoop->getStartLoc(),
530 InnerLoop->getHeader())
531 << "Loop interchanged with enclosing loop.";
532 });
533
534 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
535 LIT.transform();
536 LLVM_DEBUG(dbgs() << "Loops interchanged.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops interchanged.\n"
; } } while (false)
;
537 LoopsInterchanged++;
538
539 llvm::formLCSSARecursively(*OuterLoop, *DT, LI, SE);
540 return true;
541 }
542};
543
544} // end anonymous namespace
545
546bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
547 return any_of(*BB, [](const Instruction &I) {
548 return I.mayHaveSideEffects() || I.mayReadFromMemory();
549 });
550}
551
552bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
553 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
554 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
555 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
556
557 LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Checking if loops are tightly nested\n"
; } } while (false)
;
558
559 // A perfectly nested loop will not have any branch in between the outer and
560 // inner block i.e. outer header will branch to either inner preheader and
561 // outerloop latch.
562 BranchInst *OuterLoopHeaderBI =
563 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
564 if (!OuterLoopHeaderBI)
565 return false;
566
567 for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
568 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
569 Succ != OuterLoopLatch)
570 return false;
571
572 LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Checking instructions in Loop header and Loop latch\n"
; } } while (false)
;
573 // We do not have any basic block in between now make sure the outer header
574 // and outer loop latch doesn't contain any unsafe instructions.
575 if (containsUnsafeInstructions(OuterLoopHeader) ||
576 containsUnsafeInstructions(OuterLoopLatch))
577 return false;
578
579 // Also make sure the inner loop preheader does not contain any unsafe
580 // instructions. Note that all instructions in the preheader will be moved to
581 // the outer loop header when interchanging.
582 if (InnerLoopPreHeader != OuterLoopHeader &&
583 containsUnsafeInstructions(InnerLoopPreHeader))
584 return false;
585
586 BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
587 // Ensure the inner loop exit block flows to the outer loop latch possibly
588 // through empty blocks.
589 const BasicBlock &SuccInner =
590 LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch);
591 if (&SuccInner != OuterLoopLatch) {
592 LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExitdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop exit block "
<< *InnerLoopExit << " does not lead to the outer loop latch.\n"
;; } } while (false)
593 << " does not lead to the outer loop latch.\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop exit block "
<< *InnerLoopExit << " does not lead to the outer loop latch.\n"
;; } } while (false)
;
594 return false;
595 }
596 // The inner loop exit block does flow to the outer loop latch and not some
597 // other BBs, now make sure it contains safe instructions, since it will be
598 // moved into the (new) inner loop after interchange.
599 if (containsUnsafeInstructions(InnerLoopExit))
600 return false;
601
602 LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops are perfectly nested\n"
; } } while (false)
;
603 // We have a perfect loop nest.
604 return true;
605}
606
607bool LoopInterchangeLegality::isLoopStructureUnderstood() {
608 BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
609 for (PHINode *InnerInduction : InnerLoopInductions) {
610 unsigned Num = InnerInduction->getNumOperands();
611 for (unsigned i = 0; i < Num; ++i) {
612 Value *Val = InnerInduction->getOperand(i);
613 if (isa<Constant>(Val))
614 continue;
615 Instruction *I = dyn_cast<Instruction>(Val);
616 if (!I)
617 return false;
618 // TODO: Handle triangular loops.
619 // e.g. for(int i=0;i<N;i++)
620 // for(int j=i;j<N;j++)
621 unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
622 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
623 InnerLoopPreheader &&
624 !OuterLoop->isLoopInvariant(I)) {
625 return false;
626 }
627 }
628 }
629
630 // TODO: Handle triangular loops of another form.
631 // e.g. for(int i=0;i<N;i++)
632 // for(int j=0;j<i;j++)
633 // or,
634 // for(int i=0;i<N;i++)
635 // for(int j=0;j*i<N;j++)
636 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
637 BranchInst *InnerLoopLatchBI =
638 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
639 if (!InnerLoopLatchBI->isConditional())
640 return false;
641 if (CmpInst *InnerLoopCmp =
642 dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) {
643 Value *Op0 = InnerLoopCmp->getOperand(0);
644 Value *Op1 = InnerLoopCmp->getOperand(1);
645
646 // LHS and RHS of the inner loop exit condition, e.g.,
647 // in "for(int j=0;j<i;j++)", LHS is j and RHS is i.
648 Value *Left = nullptr;
649 Value *Right = nullptr;
650
651 // Check if V only involves inner loop induction variable.
652 // Return true if V is InnerInduction, or a cast from
653 // InnerInduction, or a binary operator that involves
654 // InnerInduction and a constant.
655 std::function<bool(Value *)> IsPathToInnerIndVar;
656 IsPathToInnerIndVar = [this, &IsPathToInnerIndVar](const Value *V) -> bool {
657 if (llvm::is_contained(InnerLoopInductions, V))
658 return true;
659 if (isa<Constant>(V))
660 return true;
661 const Instruction *I = dyn_cast<Instruction>(V);
662 if (!I)
663 return false;
664 if (isa<CastInst>(I))
665 return IsPathToInnerIndVar(I->getOperand(0));
666 if (isa<BinaryOperator>(I))
667 return IsPathToInnerIndVar(I->getOperand(0)) &&
668 IsPathToInnerIndVar(I->getOperand(1));
669 return false;
670 };
671
672 // In case of multiple inner loop indvars, it is okay if LHS and RHS
673 // are both inner indvar related variables.
674 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
675 return true;
676
677 // Otherwise we check if the cmp instruction compares an inner indvar
678 // related variable (Left) with a outer loop invariant (Right).
679 if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
680 Left = Op0;
681 Right = Op1;
682 } else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
683 Left = Op1;
684 Right = Op0;
685 }
686
687 if (Left == nullptr)
688 return false;
689
690 const SCEV *S = SE->getSCEV(Right);
691 if (!SE->isLoopInvariant(S, OuterLoop))
692 return false;
693 }
694
695 return true;
696}
697
698// If SV is a LCSSA PHI node with a single incoming value, return the incoming
699// value.
700static Value *followLCSSA(Value *SV) {
701 PHINode *PHI = dyn_cast<PHINode>(SV);
702 if (!PHI)
703 return SV;
704
705 if (PHI->getNumIncomingValues() != 1)
706 return SV;
707 return followLCSSA(PHI->getIncomingValue(0));
708}
709
710// Check V's users to see if it is involved in a reduction in L.
711static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
712 // Reduction variables cannot be constants.
713 if (isa<Constant>(V))
714 return nullptr;
715
716 for (Value *User : V->users()) {
717 if (PHINode *PHI = dyn_cast<PHINode>(User)) {
718 if (PHI->getNumIncomingValues() == 1)
719 continue;
720 RecurrenceDescriptor RD;
721 if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) {
722 // Detect floating point reduction only when it can be reordered.
723 if (RD.getExactFPMathInst() != nullptr)
724 return nullptr;
725 return PHI;
726 }
727 return nullptr;
728 }
729 }
730
731 return nullptr;
732}
733
734bool LoopInterchangeLegality::findInductionAndReductions(
735 Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
736 if (!L->getLoopLatch() || !L->getLoopPredecessor())
737 return false;
738 for (PHINode &PHI : L->getHeader()->phis()) {
739 RecurrenceDescriptor RD;
740 InductionDescriptor ID;
741 if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
742 Inductions.push_back(&PHI);
743 else {
744 // PHIs in inner loops need to be part of a reduction in the outer loop,
745 // discovered when checking the PHIs of the outer loop earlier.
746 if (!InnerLoop) {
747 if (!OuterInnerReductions.count(&PHI)) {
748 LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop PHI is not part of reductions "
"across the outer loop.\n"; } } while (false)
749 "across the outer loop.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop PHI is not part of reductions "
"across the outer loop.\n"; } } while (false)
;
750 return false;
751 }
752 } else {
753 assert(PHI.getNumIncomingValues() == 2 &&(static_cast <bool> (PHI.getNumIncomingValues() == 2 &&
"Phis in loop header should have exactly 2 incoming values")
? void (0) : __assert_fail ("PHI.getNumIncomingValues() == 2 && \"Phis in loop header should have exactly 2 incoming values\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 754, __extension__
__PRETTY_FUNCTION__))
754 "Phis in loop header should have exactly 2 incoming values")(static_cast <bool> (PHI.getNumIncomingValues() == 2 &&
"Phis in loop header should have exactly 2 incoming values")
? void (0) : __assert_fail ("PHI.getNumIncomingValues() == 2 && \"Phis in loop header should have exactly 2 incoming values\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 754, __extension__
__PRETTY_FUNCTION__))
;
755 // Check if we have a PHI node in the outer loop that has a reduction
756 // result from the inner loop as an incoming value.
757 Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
758 PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
759 if (!InnerRedPhi ||
760 !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
761 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
762 dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
763 << "Failed to recognize PHI as an induction or reduction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n"
; } } while (false)
;
764 return false;
765 }
766 OuterInnerReductions.insert(&PHI);
767 OuterInnerReductions.insert(InnerRedPhi);
768 }
769 }
770 }
771 return true;
772}
773
774// This function indicates the current limitations in the transform as a result
775// of which we do not proceed.
776bool LoopInterchangeLegality::currentLimitations() {
777 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
778
779 // transform currently expects the loop latches to also be the exiting
780 // blocks.
781 if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
782 OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
783 !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
784 !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
785 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
786 dbgs() << "Loops where the latch is not the exiting block are not"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
787 << " supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not"
<< " supported currently.\n"; } } while (false)
;
788 ORE->emit([&]() {
789 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "ExitingNotLatch",
790 OuterLoop->getStartLoc(),
791 OuterLoop->getHeader())
792 << "Loops where the latch is not the exiting block cannot be"
793 " interchange currently.";
794 });
795 return true;
796 }
797
798 SmallVector<PHINode *, 8> Inductions;
799 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
800 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
801 dbgs() << "Only outer loops with induction or reduction PHI nodes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
802 << "are supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
;
803 ORE->emit([&]() {
804 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIOuter",
805 OuterLoop->getStartLoc(),
806 OuterLoop->getHeader())
807 << "Only outer loops with induction or reduction PHI nodes can be"
808 " interchanged currently.";
809 });
810 return true;
811 }
812
813 Inductions.clear();
814 // For multi-level loop nests, make sure that all phi nodes for inner loops
815 // at all levels can be recognized as a induction or reduction phi. Bail out
816 // if a phi node at a certain nesting level cannot be properly recognized.
817 Loop *CurLevelLoop = OuterLoop;
818 while (!CurLevelLoop->getSubLoops().empty()) {
819 // We already made sure that the loop nest is tightly nested.
820 CurLevelLoop = CurLevelLoop->getSubLoops().front();
821 if (!findInductionAndReductions(CurLevelLoop, Inductions, nullptr)) {
822 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
823 dbgs() << "Only inner loops with induction or reduction PHI nodes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
824 << "are supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n"; } } while (false)
;
825 ORE->emit([&]() {
826 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIInner",
827 CurLevelLoop->getStartLoc(),
828 CurLevelLoop->getHeader())
829 << "Only inner loops with induction or reduction PHI nodes can be"
830 " interchange currently.";
831 });
832 return true;
833 }
834 }
835
836 // TODO: Triangular loops are not handled for now.
837 if (!isLoopStructureUnderstood()) {
838 LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loop structure not understood by pass\n"
; } } while (false)
;
839 ORE->emit([&]() {
840 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedStructureInner",
841 InnerLoop->getStartLoc(),
842 InnerLoop->getHeader())
843 << "Inner loop structure not understood currently.";
844 });
845 return true;
846 }
847
848 return false;
849}
850
851bool LoopInterchangeLegality::findInductions(
852 Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
853 for (PHINode &PHI : L->getHeader()->phis()) {
854 InductionDescriptor ID;
855 if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
856 Inductions.push_back(&PHI);
857 }
858 return !Inductions.empty();
859}
860
861// We currently only support LCSSA PHI nodes in the inner loop exit, if their
862// users are either reduction PHIs or PHIs outside the outer loop (which means
863// the we are only interested in the final value after the loop).
864static bool
865areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL,
866 SmallPtrSetImpl<PHINode *> &Reductions) {
867 BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
868 for (PHINode &PHI : InnerExit->phis()) {
869 // Reduction lcssa phi will have only 1 incoming block that from loop latch.
870 if (PHI.getNumIncomingValues() > 1)
871 return false;
872 if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
873 PHINode *PN = dyn_cast<PHINode>(U);
874 return !PN ||
875 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
876 })) {
877 return false;
878 }
879 }
880 return true;
881}
882
883// We currently support LCSSA PHI nodes in the outer loop exit, if their
884// incoming values do not come from the outer loop latch or if the
885// outer loop latch has a single predecessor. In that case, the value will
886// be available if both the inner and outer loop conditions are true, which
887// will still be true after interchanging. If we have multiple predecessor,
888// that may not be the case, e.g. because the outer loop latch may be executed
889// if the inner loop is not executed.
890static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
891 BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
892 for (PHINode &PHI : LoopNestExit->phis()) {
893 for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
894 Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
895 if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
896 continue;
897
898 // The incoming value is defined in the outer loop latch. Currently we
899 // only support that in case the outer loop latch has a single predecessor.
900 // This guarantees that the outer loop latch is executed if and only if
901 // the inner loop is executed (because tightlyNested() guarantees that the
902 // outer loop header only branches to the inner loop or the outer loop
903 // latch).
904 // FIXME: We could weaken this logic and allow multiple predecessors,
905 // if the values are produced outside the loop latch. We would need
906 // additional logic to update the PHI nodes in the exit block as
907 // well.
908 if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
909 return false;
910 }
911 }
912 return true;
913}
914
915// In case of multi-level nested loops, it may occur that lcssa phis exist in
916// the latch of InnerLoop, i.e., when defs of the incoming values are further
917// inside the loopnest. Sometimes those incoming values are not available
918// after interchange, since the original inner latch will become the new outer
919// latch which may have predecessor paths that do not include those incoming
920// values.
921// TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of
922// multi-level loop nests.
923static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
924 if (InnerLoop->getSubLoops().empty())
925 return true;
926 // If the original outer latch has only one predecessor, then values defined
927 // further inside the looploop, e.g., in the innermost loop, will be available
928 // at the new outer latch after interchange.
929 if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr)
930 return true;
931
932 // The outer latch has more than one predecessors, i.e., the inner
933 // exit and the inner header.
934 // PHI nodes in the inner latch are lcssa phis where the incoming values
935 // are defined further inside the loopnest. Check if those phis are used
936 // in the original inner latch. If that is the case then bail out since
937 // those incoming values may not be available at the new outer latch.
938 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
939 for (PHINode &PHI : InnerLoopLatch->phis()) {
940 for (auto *U : PHI.users()) {
941 Instruction *UI = cast<Instruction>(U);
942 if (InnerLoopLatch == UI->getParent())
943 return false;
944 }
945 }
946 return true;
947}
948
949bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
950 unsigned OuterLoopId,
951 CharMatrix &DepMatrix) {
952 if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
953 LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
954 << " and OuterLoopId = " << OuterLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
955 << " due to dependence\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = "
<< InnerLoopId << " and OuterLoopId = " <<
OuterLoopId << " due to dependence\n"; } } while (false
)
;
956 ORE->emit([&]() {
957 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "Dependence",
958 InnerLoop->getStartLoc(),
959 InnerLoop->getHeader())
960 << "Cannot interchange loops due to dependences.";
961 });
962 return false;
963 }
964 // Check if outer and inner loop contain legal instructions only.
965 for (auto *BB : OuterLoop->blocks())
966 for (Instruction &I : BB->instructionsWithoutDebug())
967 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
968 // readnone functions do not prevent interchanging.
969 if (CI->onlyWritesMemory())
970 continue;
971 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
972 dbgs() << "Loops with call instructions cannot be interchanged "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
973 << "safely.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged "
<< "safely."; } } while (false)
;
974 ORE->emit([&]() {
975 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "CallInst",
976 CI->getDebugLoc(),
977 CI->getParent())
978 << "Cannot interchange loops due to call instruction.";
979 });
980
981 return false;
982 }
983
984 if (!findInductions(InnerLoop, InnerLoopInductions)) {
985 LLVM_DEBUG(dbgs() << "Cound not find inner loop induction variables.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cound not find inner loop induction variables.\n"
; } } while (false)
;
986 return false;
987 }
988
989 if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) {
990 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in inner loop latch.\n"
; } } while (false)
;
991 ORE->emit([&]() {
992 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedInnerLatchPHI",
993 InnerLoop->getStartLoc(),
994 InnerLoop->getHeader())
995 << "Cannot interchange loops because unsupported PHI nodes found "
996 "in inner loop latch.";
997 });
998 return false;
999 }
1000
1001 // TODO: The loops could not be interchanged due to current limitations in the
1002 // transform module.
1003 if (currentLimitations()) {
1004 LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Not legal because of current transform limitation\n"
; } } while (false)
;
1005 return false;
1006 }
1007
1008 // Check if the loops are tightly nested.
1009 if (!tightlyNested(OuterLoop, InnerLoop)) {
1010 LLVM_DEBUG(dbgs() << "Loops not tightly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Loops not tightly nested\n"
; } } while (false)
;
1011 ORE->emit([&]() {
1012 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NotTightlyNested",
1013 InnerLoop->getStartLoc(),
1014 InnerLoop->getHeader())
1015 << "Cannot interchange loops because they are not tightly "
1016 "nested.";
1017 });
1018 return false;
1019 }
1020
1021 if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1022 OuterInnerReductions)) {
1023 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in inner loop exit.\n"
; } } while (false)
;
1024 ORE->emit([&]() {
1025 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedExitPHI",
1026 InnerLoop->getStartLoc(),
1027 InnerLoop->getHeader())
1028 << "Found unsupported PHI node in loop exit.";
1029 });
1030 return false;
1031 }
1032
1033 if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1034 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in outer loop exit.\n"
; } } while (false)
;
1035 ORE->emit([&]() {
1036 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedExitPHI",
1037 OuterLoop->getStartLoc(),
1038 OuterLoop->getHeader())
1039 << "Found unsupported PHI node in loop exit.";
1040 });
1041 return false;
1042 }
1043
1044 return true;
1045}
1046
1047int LoopInterchangeProfitability::getInstrOrderCost() {
1048 unsigned GoodOrder, BadOrder;
1049 BadOrder = GoodOrder = 0;
1050 for (BasicBlock *BB : InnerLoop->blocks()) {
1051 for (Instruction &Ins : *BB) {
1052 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1053 unsigned NumOp = GEP->getNumOperands();
1054 bool FoundInnerInduction = false;
1055 bool FoundOuterInduction = false;
1056 for (unsigned i = 0; i < NumOp; ++i) {
1057 // Skip operands that are not SCEV-able.
1058 if (!SE->isSCEVable(GEP->getOperand(i)->getType()))
1059 continue;
1060
1061 const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
1062 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1063 if (!AR)
1064 continue;
1065
1066 // If we find the inner induction after an outer induction e.g.
1067 // for(int i=0;i<N;i++)
1068 // for(int j=0;j<N;j++)
1069 // A[i][j] = A[i-1][j-1]+k;
1070 // then it is a good order.
1071 if (AR->getLoop() == InnerLoop) {
1072 // We found an InnerLoop induction after OuterLoop induction. It is
1073 // a good order.
1074 FoundInnerInduction = true;
1075 if (FoundOuterInduction) {
1076 GoodOrder++;
1077 break;
1078 }
1079 }
1080 // If we find the outer induction after an inner induction e.g.
1081 // for(int i=0;i<N;i++)
1082 // for(int j=0;j<N;j++)
1083 // A[j][i] = A[j-1][i-1]+k;
1084 // then it is a bad order.
1085 if (AR->getLoop() == OuterLoop) {
1086 // We found an OuterLoop induction after InnerLoop induction. It is
1087 // a bad order.
1088 FoundOuterInduction = true;
1089 if (FoundInnerInduction) {
1090 BadOrder++;
1091 break;
1092 }
1093 }
1094 }
1095 }
1096 }
1097 }
1098 return GoodOrder - BadOrder;
1099}
1100
1101std::optional<bool>
1102LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
1103 const DenseMap<const Loop *, unsigned> &CostMap,
1104 std::unique_ptr<CacheCost> &CC) {
1105 // This is the new cost model returned from loop cache analysis.
1106 // A smaller index means the loop should be placed an outer loop, and vice
1107 // versa.
1108 if (CostMap.find(InnerLoop) != CostMap.end() &&
1109 CostMap.find(OuterLoop) != CostMap.end()) {
1110 unsigned InnerIndex = 0, OuterIndex = 0;
1111 InnerIndex = CostMap.find(InnerLoop)->second;
1112 OuterIndex = CostMap.find(OuterLoop)->second;
1113 LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndexdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "InnerIndex = " <<
InnerIndex << ", OuterIndex = " << OuterIndex <<
"\n"; } } while (false)
1114 << ", OuterIndex = " << OuterIndex << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "InnerIndex = " <<
InnerIndex << ", OuterIndex = " << OuterIndex <<
"\n"; } } while (false)
;
1115 if (InnerIndex < OuterIndex)
1116 return std::optional<bool>(true);
1117 assert(InnerIndex != OuterIndex && "CostMap should assign unique "(static_cast <bool> (InnerIndex != OuterIndex &&
"CostMap should assign unique " "numbers to each loop") ? void
(0) : __assert_fail ("InnerIndex != OuterIndex && \"CostMap should assign unique \" \"numbers to each loop\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1118, __extension__
__PRETTY_FUNCTION__))
1118 "numbers to each loop")(static_cast <bool> (InnerIndex != OuterIndex &&
"CostMap should assign unique " "numbers to each loop") ? void
(0) : __assert_fail ("InnerIndex != OuterIndex && \"CostMap should assign unique \" \"numbers to each loop\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1118, __extension__
__PRETTY_FUNCTION__))
;
1119 if (CC->getLoopCost(*OuterLoop) == CC->getLoopCost(*InnerLoop))
1120 return std::nullopt;
1121 return std::optional<bool>(false);
1122 }
1123 return std::nullopt;
1124}
1125
1126std::optional<bool>
1127LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
1128 // Legacy cost model: this is rough cost estimation algorithm. It counts the
1129 // good and bad order of induction variables in the instruction and allows
1130 // reordering if number of bad orders is more than good.
1131 int Cost = getInstrOrderCost();
1132 LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Cost = " << Cost
<< "\n"; } } while (false)
;
1133 if (Cost < 0 && Cost < LoopInterchangeCostThreshold)
1134 return std::optional<bool>(true);
1135
1136 return std::nullopt;
1137}
1138
1139std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1140 unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) {
1141 for (auto &Row : DepMatrix) {
1142 // If the inner loop is loop independent or doesn't carry any dependency
1143 // it is not profitable to move this to outer position, since we are
1144 // likely able to do inner loop vectorization already.
1145 if (Row[InnerLoopId] == 'I' || Row[InnerLoopId] == '=')
1146 return std::optional<bool>(false);
1147
1148 // If the outer loop is not loop independent it is not profitable to move
1149 // this to inner position, since doing so would not enable inner loop
1150 // parallelism.
1151 if (Row[OuterLoopId] != 'I' && Row[OuterLoopId] != '=')
1152 return std::optional<bool>(false);
1153 }
1154 // If inner loop has dependence and outer loop is loop independent then it
1155 // is/ profitable to interchange to enable inner loop parallelism.
1156 // If there are no dependences, interchanging will not improve anything.
1157 return std::optional<bool>(!DepMatrix.empty());
1158}
1159
1160bool LoopInterchangeProfitability::isProfitable(
1161 const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
1162 unsigned OuterLoopId, CharMatrix &DepMatrix,
1163 const DenseMap<const Loop *, unsigned> &CostMap,
1164 std::unique_ptr<CacheCost> &CC) {
1165 // isProfitable() is structured to avoid endless loop interchange.
1166 // If loop cache analysis could decide the profitability then,
1167 // profitability check will stop and return the analysis result.
1168 // If cache analysis failed to analyze the loopnest (e.g.,
1169 // due to delinearization issues) then only check whether it is
1170 // profitable for InstrOrderCost. Likewise, if InstrOrderCost failed to
1171 // analysis the profitability then only, isProfitableForVectorization
1172 // will decide.
1173 std::optional<bool> shouldInterchange =
1174 isProfitablePerLoopCacheAnalysis(CostMap, CC);
1175 if (!shouldInterchange.has_value()) {
1176 shouldInterchange = isProfitablePerInstrOrderCost();
1177 if (!shouldInterchange.has_value())
1178 shouldInterchange =
1179 isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
1180 }
1181 if (!shouldInterchange.has_value()) {
1182 ORE->emit([&]() {
1183 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "InterchangeNotProfitable",
1184 InnerLoop->getStartLoc(),
1185 InnerLoop->getHeader())
1186 << "Insufficient information to calculate the cost of loop for "
1187 "interchange.";
1188 });
1189 return false;
1190 } else if (!shouldInterchange.value()) {
1191 ORE->emit([&]() {
1192 return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "InterchangeNotProfitable",
1193 InnerLoop->getStartLoc(),
1194 InnerLoop->getHeader())
1195 << "Interchanging loops is not considered to improve cache "
1196 "locality nor vectorization.";
1197 });
1198 return false;
1199 }
1200 return true;
1201}
1202
1203void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1204 Loop *InnerLoop) {
1205 for (Loop *L : *OuterLoop)
1206 if (L == InnerLoop) {
1207 OuterLoop->removeChildLoop(L);
1208 return;
1209 }
1210 llvm_unreachable("Couldn't find loop")::llvm::llvm_unreachable_internal("Couldn't find loop", "llvm/lib/Transforms/Scalar/LoopInterchange.cpp"
, 1210)
;
1211}
1212
1213/// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1214/// new inner and outer loop after interchanging: NewInner is the original
1215/// outer loop and NewOuter is the original inner loop.
1216///
1217/// Before interchanging, we have the following structure
1218/// Outer preheader
1219// Outer header
1220// Inner preheader
1221// Inner header
1222// Inner body
1223// Inner latch
1224// outer bbs
1225// Outer latch
1226//
1227// After interchanging:
1228// Inner preheader
1229// Inner header
1230// Outer preheader
1231// Outer header
1232// Inner body
1233// outer bbs
1234// Outer latch
1235// Inner latch
1236void LoopInterchangeTransform::restructureLoops(
1237 Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1238 BasicBlock *OrigOuterPreHeader) {
1239 Loop *OuterLoopParent = OuterLoop->getParentLoop();
1240 // The original inner loop preheader moves from the new inner loop to
1241 // the parent loop, if there is one.
1242 NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1243 LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1244
1245 // Switch the loop levels.
1246 if (OuterLoopParent) {
1247 // Remove the loop from its parent loop.
1248 removeChildLoop(OuterLoopParent, NewInner);
1249 removeChildLoop(NewInner, NewOuter);
1250 OuterLoopParent->addChildLoop(NewOuter);
1251 } else {
1252 removeChildLoop(NewInner, NewOuter);
1253 LI->changeTopLevelLoop(NewInner, NewOuter);
1254 }
1255 while (!NewOuter->isInnermost())
1256 NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1257 NewOuter->addChildLoop(NewInner);
1258
1259 // BBs from the original inner loop.
1260 SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1261
1262 // Add BBs from the original outer loop to the original inner loop (excluding
1263 // BBs already in inner loop)
1264 for (BasicBlock *BB : NewInner->blocks())
1265 if (LI->getLoopFor(BB) == NewInner)
1266 NewOuter->addBlockEntry(BB);
1267
1268 // Now remove inner loop header and latch from the new inner loop and move
1269 // other BBs (the loop body) to the new inner loop.
1270 BasicBlock *OuterHeader = NewOuter->getHeader();
1271 BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1272 for (BasicBlock *BB : OrigInnerBBs) {
1273 // Nothing will change for BBs in child loops.
1274 if (LI->getLoopFor(BB) != NewOuter)
1275 continue;
1276 // Remove the new outer loop header and latch from the new inner loop.
1277 if (BB == OuterHeader || BB == OuterLatch)
1278 NewInner->removeBlockFromLoop(BB);
1279 else
1280 LI->changeLoopFor(BB, NewInner);
1281 }
1282
1283 // The preheader of the original outer loop becomes part of the new
1284 // outer loop.
1285 NewOuter->addBlockEntry(OrigOuterPreHeader);
1286 LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1287
1288 // Tell SE that we move the loops around.
1289 SE->forgetLoop(NewOuter);
1290}
1291
1292bool LoopInterchangeTransform::transform() {
1293 bool Transformed = false;
1294
1295 if (InnerLoop->getSubLoops().empty()) {
1296 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1297 LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Splitting the inner loop latch\n"
; } } while (false)
;
1298 auto &InductionPHIs = LIL.getInnerLoopInductions();
1299 if (InductionPHIs.empty()) {
1300 LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Failed to find the point to split loop latch \n"
; } } while (false)
;
1301 return false;
1302 }
1303
1304 SmallVector<Instruction *, 8> InnerIndexVarList;
1305 for (PHINode *CurInductionPHI : InductionPHIs) {
1306 if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1307 InnerIndexVarList.push_back(
1308 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
1309 else
1310 InnerIndexVarList.push_back(
1311 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
1312 }
1313
1314 // Create a new latch block for the inner loop. We split at the
1315 // current latch's terminator and then move the condition and all
1316 // operands that are not either loop-invariant or the induction PHI into the
1317 // new latch block.
1318 BasicBlock *NewLatch =
1319 SplitBlock(InnerLoop->getLoopLatch(),
1320 InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
1321
1322 SmallSetVector<Instruction *, 4> WorkList;
1323 unsigned i = 0;
1324 auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() {
1325 for (; i < WorkList.size(); i++) {
1326 // Duplicate instruction and move it the new latch. Update uses that
1327 // have been moved.
1328 Instruction *NewI = WorkList[i]->clone();
1329 NewI->insertBefore(NewLatch->getFirstNonPHI());
1330 assert(!NewI->mayHaveSideEffects() &&(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1332, __extension__
__PRETTY_FUNCTION__))
1331 "Moving instructions with side-effects may change behavior of "(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1332, __extension__
__PRETTY_FUNCTION__))
1332 "the loop nest!")(static_cast <bool> (!NewI->mayHaveSideEffects() &&
"Moving instructions with side-effects may change behavior of "
"the loop nest!") ? void (0) : __assert_fail ("!NewI->mayHaveSideEffects() && \"Moving instructions with side-effects may change behavior of \" \"the loop nest!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1332, __extension__
__PRETTY_FUNCTION__))
;
1333 for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) {
1334 Instruction *UserI = cast<Instruction>(U.getUser());
1335 if (!InnerLoop->contains(UserI->getParent()) ||
1336 UserI->getParent() == NewLatch ||
1337 llvm::is_contained(InductionPHIs, UserI))
1338 U.set(NewI);
1339 }
1340 // Add operands of moved instruction to the worklist, except if they are
1341 // outside the inner loop or are the induction PHI.
1342 for (Value *Op : WorkList[i]->operands()) {
1343 Instruction *OpI = dyn_cast<Instruction>(Op);
1344 if (!OpI ||
1345 this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
1346 llvm::is_contained(InductionPHIs, OpI))
1347 continue;
1348 WorkList.insert(OpI);
1349 }
1350 }
1351 };
1352
1353 // FIXME: Should we interchange when we have a constant condition?
1354 Instruction *CondI = dyn_cast<Instruction>(
1355 cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
1356 ->getCondition());
1357 if (CondI)
1358 WorkList.insert(CondI);
1359 MoveInstructions();
1360 for (Instruction *InnerIndexVar : InnerIndexVarList)
1361 WorkList.insert(cast<Instruction>(InnerIndexVar));
1362 MoveInstructions();
1363 }
1364
1365 // Ensure the inner loop phi nodes have a separate basic block.
1366 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1367 if (InnerLoopHeader->getFirstNonPHI() != InnerLoopHeader->getTerminator()) {
1368 SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1369 LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "splitting InnerLoopHeader done\n"
; } } while (false)
;
1370 }
1371
1372 // Instructions in the original inner loop preheader may depend on values
1373 // defined in the outer loop header. Move them there, because the original
1374 // inner loop preheader will become the entry into the interchanged loop nest.
1375 // Currently we move all instructions and rely on LICM to move invariant
1376 // instructions outside the loop nest.
1377 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1378 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1379 if (InnerLoopPreHeader != OuterLoopHeader) {
1380 SmallPtrSet<Instruction *, 4> NeedsMoving;
1381 for (Instruction &I :
1382 make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
1383 std::prev(InnerLoopPreHeader->end()))))
1384 I.moveBefore(OuterLoopHeader->getTerminator());
1385 }
1386
1387 Transformed |= adjustLoopLinks();
1388 if (!Transformed) {
1389 LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "adjustLoopLinks failed\n"
; } } while (false)
;
1390 return false;
1391 }
1392
1393 return true;
1394}
1395
1396/// \brief Move all instructions except the terminator from FromBB right before
1397/// InsertBefore
1398static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1399 BasicBlock *ToBB = InsertBefore->getParent();
1400
1401 ToBB->splice(InsertBefore->getIterator(), FromBB, FromBB->begin(),
1402 FromBB->getTerminator()->getIterator());
1403}
1404
1405/// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
1406static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
1407 // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
1408 // from BB1 afterwards.
1409 auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
1410 SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
1411 for (Instruction *I : TempInstrs)
1412 I->removeFromParent();
1413
1414 // Move instructions from BB2 to BB1.
1415 moveBBContents(BB2, BB1->getTerminator());
1416
1417 // Move instructions from TempInstrs to BB2.
1418 for (Instruction *I : TempInstrs)
1419 I->insertBefore(BB2->getTerminator());
1420}
1421
1422// Update BI to jump to NewBB instead of OldBB. Records updates to the
1423// dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1424// \p OldBB is exactly once in BI's successor list.
1425static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1426 BasicBlock *NewBB,
1427 std::vector<DominatorTree::UpdateType> &DTUpdates,
1428 bool MustUpdateOnce = true) {
1429 assert((!MustUpdateOnce ||(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1433, __extension__
__PRETTY_FUNCTION__))
1430 llvm::count_if(successors(BI),(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1433, __extension__
__PRETTY_FUNCTION__))
1431 [OldBB](BasicBlock *BB) {(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1433, __extension__
__PRETTY_FUNCTION__))
1432 return BB == OldBB;(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1433, __extension__
__PRETTY_FUNCTION__))
1433 }) == 1) && "BI must jump to OldBB exactly once.")(static_cast <bool> ((!MustUpdateOnce || llvm::count_if
(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB
; }) == 1) && "BI must jump to OldBB exactly once.") ?
void (0) : __assert_fail ("(!MustUpdateOnce || llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) == 1) && \"BI must jump to OldBB exactly once.\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1433, __extension__
__PRETTY_FUNCTION__))
;
1434 bool Changed = false;
1435 for (Use &Op : BI->operands())
1436 if (Op == OldBB) {
1437 Op.set(NewBB);
1438 Changed = true;
1439 }
1440
1441 if (Changed) {
1442 DTUpdates.push_back(
1443 {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
1444 DTUpdates.push_back(
1445 {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
1446 }
1447 assert(Changed && "Expected a successor to be updated")(static_cast <bool> (Changed && "Expected a successor to be updated"
) ? void (0) : __assert_fail ("Changed && \"Expected a successor to be updated\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1447, __extension__
__PRETTY_FUNCTION__))
;
1448}
1449
1450// Move Lcssa PHIs to the right place.
1451static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
1452 BasicBlock *InnerLatch, BasicBlock *OuterHeader,
1453 BasicBlock *OuterLatch, BasicBlock *OuterExit,
1454 Loop *InnerLoop, LoopInfo *LI) {
1455
1456 // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
1457 // defined either in the header or latch. Those blocks will become header and
1458 // latch of the new outer loop, and the only possible users can PHI nodes
1459 // in the exit block of the loop nest or the outer loop header (reduction
1460 // PHIs, in that case, the incoming value must be defined in the inner loop
1461 // header). We can just substitute the user with the incoming value and remove
1462 // the PHI.
1463 for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
1464 assert(P.getNumIncomingValues() == 1 &&(static_cast <bool> (P.getNumIncomingValues() == 1 &&
"Only loops with a single exit are supported!") ? void (0) :
__assert_fail ("P.getNumIncomingValues() == 1 && \"Only loops with a single exit are supported!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1465, __extension__
__PRETTY_FUNCTION__))
1465 "Only loops with a single exit are supported!")(static_cast <bool> (P.getNumIncomingValues() == 1 &&
"Only loops with a single exit are supported!") ? void (0) :
__assert_fail ("P.getNumIncomingValues() == 1 && \"Only loops with a single exit are supported!\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1465, __extension__
__PRETTY_FUNCTION__))
;
1466
1467 // Incoming values are guaranteed be instructions currently.
1468 auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
1469 // In case of multi-level nested loops, follow LCSSA to find the incoming
1470 // value defined from the innermost loop.
1471 auto IncIInnerMost = cast<Instruction>(followLCSSA(IncI));
1472 // Skip phis with incoming values from the inner loop body, excluding the
1473 // header and latch.
1474 if (IncIInnerMost->getParent() != InnerLatch &&
1475 IncIInnerMost->getParent() != InnerHeader)
1476 continue;
1477
1478 assert(all_of(P.users(),(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1486, __extension__
__PRETTY_FUNCTION__))
1479 [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1486, __extension__
__PRETTY_FUNCTION__))
1480 return (cast<PHINode>(U)->getParent() == OuterHeader &&(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1486, __extension__
__PRETTY_FUNCTION__))
1481 IncI->getParent() == InnerHeader) ||(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1486, __extension__
__PRETTY_FUNCTION__))
1482 cast<PHINode>(U)->getParent() == OuterExit;(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1486, __extension__
__PRETTY_FUNCTION__))
1483 }) &&(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1486, __extension__
__PRETTY_FUNCTION__))
1484 "Can only replace phis iff the uses are in the loop nest exit or "(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1486, __extension__
__PRETTY_FUNCTION__))
1485 "the incoming value is defined in the inner header (it will "(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1486, __extension__
__PRETTY_FUNCTION__))
1486 "dominate all loop blocks after interchanging)")(static_cast <bool> (all_of(P.users(), [OuterHeader, OuterExit
, IncI, InnerHeader](User *U) { return (cast<PHINode>(U
)->getParent() == OuterHeader && IncI->getParent
() == InnerHeader) || cast<PHINode>(U)->getParent() ==
OuterExit; }) && "Can only replace phis iff the uses are in the loop nest exit or "
"the incoming value is defined in the inner header (it will "
"dominate all loop blocks after interchanging)") ? void (0) :
__assert_fail ("all_of(P.users(), [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { return (cast<PHINode>(U)->getParent() == OuterHeader && IncI->getParent() == InnerHeader) || cast<PHINode>(U)->getParent() == OuterExit; }) && \"Can only replace phis iff the uses are in the loop nest exit or \" \"the incoming value is defined in the inner header (it will \" \"dominate all loop blocks after interchanging)\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1486, __extension__
__PRETTY_FUNCTION__))
;
1487 P.replaceAllUsesWith(IncI);
1488 P.eraseFromParent();
1489 }
1490
1491 SmallVector<PHINode *, 8> LcssaInnerExit;
1492 for (PHINode &P : InnerExit->phis())
1493 LcssaInnerExit.push_back(&P);
1494
1495 SmallVector<PHINode *, 8> LcssaInnerLatch;
1496 for (PHINode &P : InnerLatch->phis())
1497 LcssaInnerLatch.push_back(&P);
1498
1499 // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1500 // If a PHI node has users outside of InnerExit, it has a use outside the
1501 // interchanged loop and we have to preserve it. We move these to
1502 // InnerLatch, which will become the new exit block for the innermost
1503 // loop after interchanging.
1504 for (PHINode *P : LcssaInnerExit)
1
Assuming '__begin1' is equal to '__end1'
1505 P->moveBefore(InnerLatch->getFirstNonPHI());
1506
1507 // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1508 // and we have to move them to the new inner latch.
1509 for (PHINode *P : LcssaInnerLatch)
2
Assuming '__begin1' is equal to '__end1'
1510 P->moveBefore(InnerExit->getFirstNonPHI());
1511
1512 // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1513 // incoming values defined in the outer loop, we have to add a new PHI
1514 // in the inner loop latch, which became the exit block of the outer loop,
1515 // after interchanging.
1516 if (OuterExit) {
3
Assuming 'OuterExit' is non-null
4
Taking true branch
1517 for (PHINode &P : OuterExit->phis()) {
1518 if (P.getNumIncomingValues() != 1)
5
Assuming the condition is false
6
Taking false branch
1519 continue;
1520 // Skip Phis with incoming values defined in the inner loop. Those should
1521 // already have been updated.
1522 auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
7
Assuming the object is a 'CastReturnType'
1523 if (!I
7.1
'I' is non-null
|| LI->getLoopFor(I->getParent()) == InnerLoop)
8
Assuming the condition is false
9
Taking false branch
1524 continue;
1525
1526 PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
10
Assuming the object is not a 'CastReturnType'
11
'NewPhi' initialized to a null pointer value
1527 NewPhi->setIncomingValue(0, P.getIncomingValue(0));
12
Called C++ object pointer is null
1528 NewPhi->setIncomingBlock(0, OuterLatch);
1529 // We might have incoming edges from other BBs, i.e., the original outer
1530 // header.
1531 for (auto *Pred : predecessors(InnerLatch)) {
1532 if (Pred == OuterLatch)
1533 continue;
1534 NewPhi->addIncoming(P.getIncomingValue(0), Pred);
1535 }
1536 NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
1537 P.setIncomingValue(0, NewPhi);
1538 }
1539 }
1540
1541 // Now adjust the incoming blocks for the LCSSA PHIs.
1542 // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1543 // with the new latch.
1544 InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
1545}
1546
1547bool LoopInterchangeTransform::adjustLoopBranches() {
1548 LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "adjustLoopBranches called\n"
; } } while (false)
;
1549 std::vector<DominatorTree::UpdateType> DTUpdates;
1550
1551 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1552 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1553
1554 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1556, __extension__
__PRETTY_FUNCTION__))
1555 InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1556, __extension__
__PRETTY_FUNCTION__))
1556 InnerLoopPreHeader && "Guaranteed by loop-simplify form")(static_cast <bool> (OuterLoopPreHeader != OuterLoop->
getHeader() && InnerLoopPreHeader != InnerLoop->getHeader
() && OuterLoopPreHeader && InnerLoopPreHeader
&& "Guaranteed by loop-simplify form") ? void (0) : __assert_fail
("OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && \"Guaranteed by loop-simplify form\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1556, __extension__
__PRETTY_FUNCTION__))
;
1557 // Ensure that both preheaders do not contain PHI nodes and have single
1558 // predecessors. This allows us to move them easily. We use
1559 // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1560 // preheaders do not satisfy those conditions.
1561 if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1562 !OuterLoopPreHeader->getUniquePredecessor())
1563 OuterLoopPreHeader =
1564 InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
1565 if (InnerLoopPreHeader == OuterLoop->getHeader())
1566 InnerLoopPreHeader =
1567 InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
1568
1569 // Adjust the loop preheader
1570 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1571 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1572 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1573 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1574 BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1575 BasicBlock *InnerLoopLatchPredecessor =
1576 InnerLoopLatch->getUniquePredecessor();
1577 BasicBlock *InnerLoopLatchSuccessor;
1578 BasicBlock *OuterLoopLatchSuccessor;
1579
1580 BranchInst *OuterLoopLatchBI =
1581 dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1582 BranchInst *InnerLoopLatchBI =
1583 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1584 BranchInst *OuterLoopHeaderBI =
1585 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1586 BranchInst *InnerLoopHeaderBI =
1587 dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1588
1589 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1590 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1591 !InnerLoopHeaderBI)
1592 return false;
1593
1594 BranchInst *InnerLoopLatchPredecessorBI =
1595 dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1596 BranchInst *OuterLoopPredecessorBI =
1597 dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1598
1599 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1600 return false;
1601 BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1602 if (!InnerLoopHeaderSuccessor)
1603 return false;
1604
1605 // Adjust Loop Preheader and headers.
1606 // The branches in the outer loop predecessor and the outer loop header can
1607 // be unconditional branches or conditional branches with duplicates. Consider
1608 // this when updating the successors.
1609 updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1610 InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
1611 // The outer loop header might or might not branch to the outer latch.
1612 // We are guaranteed to branch to the inner loop preheader.
1613 if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
1614 // In this case the outerLoopHeader should branch to the InnerLoopLatch.
1615 updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
1616 DTUpdates,
1617 /*MustUpdateOnce=*/false);
1618 }
1619 updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1620 InnerLoopHeaderSuccessor, DTUpdates,
1621 /*MustUpdateOnce=*/false);
1622
1623 // Adjust reduction PHI's now that the incoming block has changed.
1624 InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
1625 OuterLoopHeader);
1626
1627 updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1628 OuterLoopPreHeader, DTUpdates);
1629
1630 // -------------Adjust loop latches-----------
1631 if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1632 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1633 else
1634 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1635
1636 updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1637 InnerLoopLatchSuccessor, DTUpdates);
1638
1639 if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1640 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1641 else
1642 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1643
1644 updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1645 OuterLoopLatchSuccessor, DTUpdates);
1646 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1647 DTUpdates);
1648
1649 DT->applyUpdates(DTUpdates);
1650 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1651 OuterLoopPreHeader);
1652
1653 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1654 OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
1655 InnerLoop, LI);
1656 // For PHIs in the exit block of the outer loop, outer's latch has been
1657 // replaced by Inners'.
1658 OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1659
1660 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1661 // Now update the reduction PHIs in the inner and outer loop headers.
1662 SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1663 for (PHINode &PHI : InnerLoopHeader->phis())
1664 if (OuterInnerReductions.contains(&PHI))
1665 InnerLoopPHIs.push_back(&PHI);
1666
1667 for (PHINode &PHI : OuterLoopHeader->phis())
1668 if (OuterInnerReductions.contains(&PHI))
1669 OuterLoopPHIs.push_back(&PHI);
1670
1671 // Now move the remaining reduction PHIs from outer to inner loop header and
1672 // vice versa. The PHI nodes must be part of a reduction across the inner and
1673 // outer loop and all the remains to do is and updating the incoming blocks.
1674 for (PHINode *PHI : OuterLoopPHIs) {
1675 LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Outer loop reduction PHIs:\n"
; PHI->dump();; } } while (false)
;
1676 PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
1677 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node")(static_cast <bool> (OuterInnerReductions.count(PHI) &&
"Expected a reduction PHI node") ? void (0) : __assert_fail (
"OuterInnerReductions.count(PHI) && \"Expected a reduction PHI node\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1677, __extension__
__PRETTY_FUNCTION__))
;
1678 }
1679 for (PHINode *PHI : InnerLoopPHIs) {
1680 LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-interchange")) { dbgs() << "Inner loop reduction PHIs:\n"
; PHI->dump();; } } while (false)
;
1681 PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
1682 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node")(static_cast <bool> (OuterInnerReductions.count(PHI) &&
"Expected a reduction PHI node") ? void (0) : __assert_fail (
"OuterInnerReductions.count(PHI) && \"Expected a reduction PHI node\""
, "llvm/lib/Transforms/Scalar/LoopInterchange.cpp", 1682, __extension__
__PRETTY_FUNCTION__))
;
1683 }
1684
1685 // Update the incoming blocks for moved PHI nodes.
1686 OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
1687 OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
1688 InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
1689 InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1690
1691 // Values defined in the outer loop header could be used in the inner loop
1692 // latch. In that case, we need to create LCSSA phis for them, because after
1693 // interchanging they will be defined in the new inner loop and used in the
1694 // new outer loop.
1695 IRBuilder<> Builder(OuterLoopHeader->getContext());
1696 SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
1697 for (Instruction &I :
1698 make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
1699 MayNeedLCSSAPhis.push_back(&I);
1700 formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE, Builder);
1701
1702 return true;
1703}
1704
1705bool LoopInterchangeTransform::adjustLoopLinks() {
1706 // Adjust all branches in the inner and outer loop.
1707 bool Changed = adjustLoopBranches();
1708 if (Changed) {
1709 // We have interchanged the preheaders so we need to interchange the data in
1710 // the preheaders as well. This is because the content of the inner
1711 // preheader was previously executed inside the outer loop.
1712 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1713 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1714 swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
1715 }
1716 return Changed;
1717}
1718
1719PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
1720 LoopAnalysisManager &AM,
1721 LoopStandardAnalysisResults &AR,
1722 LPMUpdater &U) {
1723 Function &F = *LN.getParent();
1724
1725 DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
1726 std::unique_ptr<CacheCost> CC =
1727 CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
1728 OptimizationRemarkEmitter ORE(&F);
1729 if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
1730 return PreservedAnalyses::all();
1731 U.markLoopNestChanged(true);
1732 return getLoopPassPreservedAnalyses();
1733}