File: | lib/Transforms/Scalar/LoopInterchange.cpp |
Warning: | line 1037, column 5 Value stored to 'InnerLoopPreHeader' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- LoopInterchange.cpp - Loop interchange pass-------------------------===// |
2 | // |
3 | // The LLVM Compiler Infrastructure |
4 | // |
5 | // This file is distributed under the University of Illinois Open Source |
6 | // License. See LICENSE.TXT for details. |
7 | // |
8 | //===----------------------------------------------------------------------===// |
9 | // |
10 | // This Pass handles loop interchange transform. |
11 | // This pass interchanges loops to provide a more cache-friendly memory access |
12 | // patterns. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include "llvm/ADT/STLExtras.h" |
17 | #include "llvm/ADT/SmallVector.h" |
18 | #include "llvm/ADT/Statistic.h" |
19 | #include "llvm/ADT/StringRef.h" |
20 | #include "llvm/Analysis/AliasAnalysis.h" |
21 | #include "llvm/Analysis/DependenceAnalysis.h" |
22 | #include "llvm/Analysis/LoopInfo.h" |
23 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
24 | #include "llvm/Analysis/ScalarEvolution.h" |
25 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
26 | #include "llvm/IR/BasicBlock.h" |
27 | #include "llvm/IR/Constants.h" |
28 | #include "llvm/IR/DiagnosticInfo.h" |
29 | #include "llvm/IR/Dominators.h" |
30 | #include "llvm/IR/Function.h" |
31 | #include "llvm/IR/InstrTypes.h" |
32 | #include "llvm/IR/Instruction.h" |
33 | #include "llvm/IR/Instructions.h" |
34 | #include "llvm/IR/Type.h" |
35 | #include "llvm/IR/User.h" |
36 | #include "llvm/IR/Value.h" |
37 | #include "llvm/Pass.h" |
38 | #include "llvm/Support/Casting.h" |
39 | #include "llvm/Support/CommandLine.h" |
40 | #include "llvm/Support/Debug.h" |
41 | #include "llvm/Support/ErrorHandling.h" |
42 | #include "llvm/Support/raw_ostream.h" |
43 | #include "llvm/Transforms/Scalar.h" |
44 | #include "llvm/Transforms/Utils.h" |
45 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
46 | #include "llvm/Transforms/Utils/LoopUtils.h" |
47 | #include <cassert> |
48 | #include <utility> |
49 | #include <vector> |
50 | |
51 | using namespace llvm; |
52 | |
53 | #define DEBUG_TYPE"loop-interchange" "loop-interchange" |
54 | |
55 | STATISTIC(LoopsInterchanged, "Number of loops interchanged")static llvm::Statistic LoopsInterchanged = {"loop-interchange" , "LoopsInterchanged", "Number of loops interchanged", {0}, { false}}; |
56 | |
57 | static cl::opt<int> LoopInterchangeCostThreshold( |
58 | "loop-interchange-threshold", cl::init(0), cl::Hidden, |
59 | cl::desc("Interchange if you gain more than this number")); |
60 | |
61 | namespace { |
62 | |
63 | using LoopVector = SmallVector<Loop *, 8>; |
64 | |
65 | // TODO: Check if we can use a sparse matrix here. |
66 | using CharMatrix = std::vector<std::vector<char>>; |
67 | |
68 | } // end anonymous namespace |
69 | |
70 | // Maximum number of dependencies that can be handled in the dependency matrix. |
71 | static const unsigned MaxMemInstrCount = 100; |
72 | |
73 | // Maximum loop depth supported. |
74 | static const unsigned MaxLoopNestDepth = 10; |
75 | |
76 | #ifdef DUMP_DEP_MATRICIES |
77 | static void printDepMatrix(CharMatrix &DepMatrix) { |
78 | for (auto &Row : DepMatrix) { |
79 | for (auto D : Row) |
80 | LLVM_DEBUG(dbgs() << D << " ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << D << " "; } } while (false); |
81 | LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "\n"; } } while (false ); |
82 | } |
83 | } |
84 | #endif |
85 | |
86 | static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, |
87 | Loop *L, DependenceInfo *DI) { |
88 | using ValueVector = SmallVector<Value *, 16>; |
89 | |
90 | ValueVector MemInstr; |
91 | |
92 | // For each block. |
93 | for (BasicBlock *BB : L->blocks()) { |
94 | // Scan the BB and collect legal loads and stores. |
95 | for (Instruction &I : *BB) { |
96 | if (!isa<Instruction>(I)) |
97 | return false; |
98 | if (auto *Ld = dyn_cast<LoadInst>(&I)) { |
99 | if (!Ld->isSimple()) |
100 | return false; |
101 | MemInstr.push_back(&I); |
102 | } else if (auto *St = dyn_cast<StoreInst>(&I)) { |
103 | if (!St->isSimple()) |
104 | return false; |
105 | MemInstr.push_back(&I); |
106 | } |
107 | } |
108 | } |
109 | |
110 | LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Found " << MemInstr .size() << " Loads and Stores to analyze\n"; } } while ( false) |
111 | << " Loads and Stores to analyze\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Found " << MemInstr .size() << " Loads and Stores to analyze\n"; } } while ( false); |
112 | |
113 | ValueVector::iterator I, IE, J, JE; |
114 | |
115 | for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) { |
116 | for (J = I, JE = MemInstr.end(); J != JE; ++J) { |
117 | std::vector<char> Dep; |
118 | Instruction *Src = cast<Instruction>(*I); |
119 | Instruction *Dst = cast<Instruction>(*J); |
120 | if (Src == Dst) |
121 | continue; |
122 | // Ignore Input dependencies. |
123 | if (isa<LoadInst>(Src) && isa<LoadInst>(Dst)) |
124 | continue; |
125 | // Track Output, Flow, and Anti dependencies. |
126 | if (auto D = DI->depends(Src, Dst, true)) { |
127 | assert(D->isOrdered() && "Expected an output, flow or anti dep.")(static_cast <bool> (D->isOrdered() && "Expected an output, flow or anti dep." ) ? void (0) : __assert_fail ("D->isOrdered() && \"Expected an output, flow or anti dep.\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Scalar/LoopInterchange.cpp" , 127, __extension__ __PRETTY_FUNCTION__)); |
128 | LLVM_DEBUG(StringRef DepType =do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output"; dbgs() << "Found " << DepType << " dependency between Src and Dst\n" << " Src:" << *Src << "\n Dst:" << * Dst << '\n'; } } while (false) |
129 | D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output"; dbgs() << "Found " << DepType << " dependency between Src and Dst\n" << " Src:" << *Src << "\n Dst:" << * Dst << '\n'; } } while (false) |
130 | dbgs() << "Found " << DepTypedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output"; dbgs() << "Found " << DepType << " dependency between Src and Dst\n" << " Src:" << *Src << "\n Dst:" << * Dst << '\n'; } } while (false) |
131 | << " dependency between Src and Dst\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output"; dbgs() << "Found " << DepType << " dependency between Src and Dst\n" << " Src:" << *Src << "\n Dst:" << * Dst << '\n'; } } while (false) |
132 | << " Src:" << *Src << "\n Dst:" << *Dst << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output"; dbgs() << "Found " << DepType << " dependency between Src and Dst\n" << " Src:" << *Src << "\n Dst:" << * Dst << '\n'; } } while (false); |
133 | unsigned Levels = D->getLevels(); |
134 | char Direction; |
135 | for (unsigned II = 1; II <= Levels; ++II) { |
136 | const SCEV *Distance = D->getDistance(II); |
137 | const SCEVConstant *SCEVConst = |
138 | dyn_cast_or_null<SCEVConstant>(Distance); |
139 | if (SCEVConst) { |
140 | const ConstantInt *CI = SCEVConst->getValue(); |
141 | if (CI->isNegative()) |
142 | Direction = '<'; |
143 | else if (CI->isZero()) |
144 | Direction = '='; |
145 | else |
146 | Direction = '>'; |
147 | Dep.push_back(Direction); |
148 | } else if (D->isScalar(II)) { |
149 | Direction = 'S'; |
150 | Dep.push_back(Direction); |
151 | } else { |
152 | unsigned Dir = D->getDirection(II); |
153 | if (Dir == Dependence::DVEntry::LT || |
154 | Dir == Dependence::DVEntry::LE) |
155 | Direction = '<'; |
156 | else if (Dir == Dependence::DVEntry::GT || |
157 | Dir == Dependence::DVEntry::GE) |
158 | Direction = '>'; |
159 | else if (Dir == Dependence::DVEntry::EQ) |
160 | Direction = '='; |
161 | else |
162 | Direction = '*'; |
163 | Dep.push_back(Direction); |
164 | } |
165 | } |
166 | while (Dep.size() != Level) { |
167 | Dep.push_back('I'); |
168 | } |
169 | |
170 | DepMatrix.push_back(Dep); |
171 | if (DepMatrix.size() > MaxMemInstrCount) { |
172 | LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Cannot handle more than " << MaxMemInstrCount << " dependencies inside loop\n" ; } } while (false) |
173 | << " dependencies inside loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Cannot handle more than " << MaxMemInstrCount << " dependencies inside loop\n" ; } } while (false); |
174 | return false; |
175 | } |
176 | } |
177 | } |
178 | } |
179 | |
180 | return true; |
181 | } |
182 | |
183 | // A loop is moved from index 'from' to an index 'to'. Update the Dependence |
184 | // matrix by exchanging the two columns. |
185 | static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, |
186 | unsigned ToIndx) { |
187 | unsigned numRows = DepMatrix.size(); |
188 | for (unsigned i = 0; i < numRows; ++i) { |
189 | char TmpVal = DepMatrix[i][ToIndx]; |
190 | DepMatrix[i][ToIndx] = DepMatrix[i][FromIndx]; |
191 | DepMatrix[i][FromIndx] = TmpVal; |
192 | } |
193 | } |
194 | |
195 | // Checks if outermost non '=','S'or'I' dependence in the dependence matrix is |
196 | // '>' |
197 | static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row, |
198 | unsigned Column) { |
199 | for (unsigned i = 0; i <= Column; ++i) { |
200 | if (DepMatrix[Row][i] == '<') |
201 | return false; |
202 | if (DepMatrix[Row][i] == '>') |
203 | return true; |
204 | } |
205 | // All dependencies were '=','S' or 'I' |
206 | return false; |
207 | } |
208 | |
209 | // Checks if no dependence exist in the dependency matrix in Row before Column. |
210 | static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row, |
211 | unsigned Column) { |
212 | for (unsigned i = 0; i < Column; ++i) { |
213 | if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' && |
214 | DepMatrix[Row][i] != 'I') |
215 | return false; |
216 | } |
217 | return true; |
218 | } |
219 | |
220 | static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row, |
221 | unsigned OuterLoopId, char InnerDep, |
222 | char OuterDep) { |
223 | if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId)) |
224 | return false; |
225 | |
226 | if (InnerDep == OuterDep) |
227 | return true; |
228 | |
229 | // It is legal to interchange if and only if after interchange no row has a |
230 | // '>' direction as the leftmost non-'='. |
231 | |
232 | if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I') |
233 | return true; |
234 | |
235 | if (InnerDep == '<') |
236 | return true; |
237 | |
238 | if (InnerDep == '>') { |
239 | // If OuterLoopId represents outermost loop then interchanging will make the |
240 | // 1st dependency as '>' |
241 | if (OuterLoopId == 0) |
242 | return false; |
243 | |
244 | // If all dependencies before OuterloopId are '=','S'or 'I'. Then |
245 | // interchanging will result in this row having an outermost non '=' |
246 | // dependency of '>' |
247 | if (!containsNoDependence(DepMatrix, Row, OuterLoopId)) |
248 | return true; |
249 | } |
250 | |
251 | return false; |
252 | } |
253 | |
254 | // Checks if it is legal to interchange 2 loops. |
255 | // [Theorem] A permutation of the loops in a perfect nest is legal if and only |
256 | // if the direction matrix, after the same permutation is applied to its |
257 | // columns, has no ">" direction as the leftmost non-"=" direction in any row. |
258 | static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, |
259 | unsigned InnerLoopId, |
260 | unsigned OuterLoopId) { |
261 | unsigned NumRows = DepMatrix.size(); |
262 | // For each row check if it is valid to interchange. |
263 | for (unsigned Row = 0; Row < NumRows; ++Row) { |
264 | char InnerDep = DepMatrix[Row][InnerLoopId]; |
265 | char OuterDep = DepMatrix[Row][OuterLoopId]; |
266 | if (InnerDep == '*' || OuterDep == '*') |
267 | return false; |
268 | if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep)) |
269 | return false; |
270 | } |
271 | return true; |
272 | } |
273 | |
274 | static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) { |
275 | LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: " << L.getHeader()->getParent()->getName() << " Loop: %" << L.getHeader()->getName() << '\n' ; } } while (false) |
276 | << L.getHeader()->getParent()->getName() << " Loop: %"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: " << L.getHeader()->getParent()->getName() << " Loop: %" << L.getHeader()->getName() << '\n' ; } } while (false) |
277 | << L.getHeader()->getName() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: " << L.getHeader()->getParent()->getName() << " Loop: %" << L.getHeader()->getName() << '\n' ; } } while (false); |
278 | LoopVector LoopList; |
279 | Loop *CurrentLoop = &L; |
280 | const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops(); |
281 | while (!Vec->empty()) { |
282 | // The current loop has multiple subloops in it hence it is not tightly |
283 | // nested. |
284 | // Discard all loops above it added into Worklist. |
285 | if (Vec->size() != 1) { |
286 | LoopList.clear(); |
287 | return; |
288 | } |
289 | LoopList.push_back(CurrentLoop); |
290 | CurrentLoop = Vec->front(); |
291 | Vec = &CurrentLoop->getSubLoops(); |
292 | } |
293 | LoopList.push_back(CurrentLoop); |
294 | V.push_back(std::move(LoopList)); |
295 | } |
296 | |
297 | static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) { |
298 | PHINode *InnerIndexVar = L->getCanonicalInductionVariable(); |
299 | if (InnerIndexVar) |
300 | return InnerIndexVar; |
301 | if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr) |
302 | return nullptr; |
303 | for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { |
304 | PHINode *PhiVar = cast<PHINode>(I); |
305 | Type *PhiTy = PhiVar->getType(); |
306 | if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && |
307 | !PhiTy->isPointerTy()) |
308 | return nullptr; |
309 | const SCEVAddRecExpr *AddRec = |
310 | dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar)); |
311 | if (!AddRec || !AddRec->isAffine()) |
312 | continue; |
313 | const SCEV *Step = AddRec->getStepRecurrence(*SE); |
314 | if (!isa<SCEVConstant>(Step)) |
315 | continue; |
316 | // Found the induction variable. |
317 | // FIXME: Handle loops with more than one induction variable. Note that, |
318 | // currently, legality makes sure we have only one induction variable. |
319 | return PhiVar; |
320 | } |
321 | return nullptr; |
322 | } |
323 | |
324 | namespace { |
325 | |
326 | /// LoopInterchangeLegality checks if it is legal to interchange the loop. |
327 | class LoopInterchangeLegality { |
328 | public: |
329 | LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE, |
330 | LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA, |
331 | OptimizationRemarkEmitter *ORE) |
332 | : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), |
333 | PreserveLCSSA(PreserveLCSSA), ORE(ORE) {} |
334 | |
335 | /// Check if the loops can be interchanged. |
336 | bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, |
337 | CharMatrix &DepMatrix); |
338 | |
339 | /// Check if the loop structure is understood. We do not handle triangular |
340 | /// loops for now. |
341 | bool isLoopStructureUnderstood(PHINode *InnerInductionVar); |
342 | |
343 | bool currentLimitations(); |
344 | |
345 | bool hasInnerLoopReduction() { return InnerLoopHasReduction; } |
346 | |
347 | private: |
348 | bool tightlyNested(Loop *Outer, Loop *Inner); |
349 | bool containsUnsafeInstructionsInHeader(BasicBlock *BB); |
350 | bool areAllUsesReductions(Instruction *Ins, Loop *L); |
351 | bool containsUnsafeInstructionsInLatch(BasicBlock *BB); |
352 | bool findInductionAndReductions(Loop *L, |
353 | SmallVector<PHINode *, 8> &Inductions, |
354 | SmallVector<PHINode *, 8> &Reductions); |
355 | |
356 | Loop *OuterLoop; |
357 | Loop *InnerLoop; |
358 | |
359 | ScalarEvolution *SE; |
360 | LoopInfo *LI; |
361 | DominatorTree *DT; |
362 | bool PreserveLCSSA; |
363 | |
364 | /// Interface to emit optimization remarks. |
365 | OptimizationRemarkEmitter *ORE; |
366 | |
367 | bool InnerLoopHasReduction = false; |
368 | }; |
369 | |
370 | /// LoopInterchangeProfitability checks if it is profitable to interchange the |
371 | /// loop. |
372 | class LoopInterchangeProfitability { |
373 | public: |
374 | LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE, |
375 | OptimizationRemarkEmitter *ORE) |
376 | : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {} |
377 | |
378 | /// Check if the loop interchange is profitable. |
379 | bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId, |
380 | CharMatrix &DepMatrix); |
381 | |
382 | private: |
383 | int getInstrOrderCost(); |
384 | |
385 | Loop *OuterLoop; |
386 | Loop *InnerLoop; |
387 | |
388 | /// Scev analysis. |
389 | ScalarEvolution *SE; |
390 | |
391 | /// Interface to emit optimization remarks. |
392 | OptimizationRemarkEmitter *ORE; |
393 | }; |
394 | |
395 | /// LoopInterchangeTransform interchanges the loop. |
396 | class LoopInterchangeTransform { |
397 | public: |
398 | LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE, |
399 | LoopInfo *LI, DominatorTree *DT, |
400 | BasicBlock *LoopNestExit, |
401 | bool InnerLoopContainsReductions) |
402 | : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), |
403 | LoopExit(LoopNestExit), |
404 | InnerLoopHasReduction(InnerLoopContainsReductions) {} |
405 | |
406 | /// Interchange OuterLoop and InnerLoop. |
407 | bool transform(); |
408 | void restructureLoops(Loop *NewInner, Loop *NewOuter, |
409 | BasicBlock *OrigInnerPreHeader, |
410 | BasicBlock *OrigOuterPreHeader); |
411 | void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop); |
412 | |
413 | private: |
414 | void splitInnerLoopLatch(Instruction *); |
415 | void splitInnerLoopHeader(); |
416 | bool adjustLoopLinks(); |
417 | void adjustLoopPreheaders(); |
418 | bool adjustLoopBranches(); |
419 | void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred, |
420 | BasicBlock *NewPred); |
421 | |
422 | Loop *OuterLoop; |
423 | Loop *InnerLoop; |
424 | |
425 | /// Scev analysis. |
426 | ScalarEvolution *SE; |
427 | |
428 | LoopInfo *LI; |
429 | DominatorTree *DT; |
430 | BasicBlock *LoopExit; |
431 | bool InnerLoopHasReduction; |
432 | }; |
433 | |
434 | // Main LoopInterchange Pass. |
435 | struct LoopInterchange : public FunctionPass { |
436 | static char ID; |
437 | ScalarEvolution *SE = nullptr; |
438 | LoopInfo *LI = nullptr; |
439 | DependenceInfo *DI = nullptr; |
440 | DominatorTree *DT = nullptr; |
441 | bool PreserveLCSSA; |
442 | |
443 | /// Interface to emit optimization remarks. |
444 | OptimizationRemarkEmitter *ORE; |
445 | |
446 | LoopInterchange() : FunctionPass(ID) { |
447 | initializeLoopInterchangePass(*PassRegistry::getPassRegistry()); |
448 | } |
449 | |
450 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
451 | AU.addRequired<ScalarEvolutionWrapperPass>(); |
452 | AU.addRequired<AAResultsWrapperPass>(); |
453 | AU.addRequired<DominatorTreeWrapperPass>(); |
454 | AU.addRequired<LoopInfoWrapperPass>(); |
455 | AU.addRequired<DependenceAnalysisWrapperPass>(); |
456 | AU.addRequiredID(LoopSimplifyID); |
457 | AU.addRequiredID(LCSSAID); |
458 | AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); |
459 | |
460 | AU.addPreserved<DominatorTreeWrapperPass>(); |
461 | AU.addPreserved<LoopInfoWrapperPass>(); |
462 | } |
463 | |
464 | bool runOnFunction(Function &F) override { |
465 | if (skipFunction(F)) |
466 | return false; |
467 | |
468 | SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); |
469 | LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
470 | DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI(); |
471 | DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
472 | ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); |
473 | PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); |
474 | |
475 | // Build up a worklist of loop pairs to analyze. |
476 | SmallVector<LoopVector, 8> Worklist; |
477 | |
478 | for (Loop *L : *LI) |
479 | populateWorklist(*L, Worklist); |
480 | |
481 | LLVM_DEBUG(dbgs() << "Worklist size = " << Worklist.size() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Worklist size = " << Worklist.size() << "\n"; } } while (false); |
482 | bool Changed = true; |
483 | while (!Worklist.empty()) { |
484 | LoopVector LoopList = Worklist.pop_back_val(); |
485 | Changed = processLoopList(LoopList, F); |
486 | } |
487 | return Changed; |
488 | } |
489 | |
490 | bool isComputableLoopNest(LoopVector LoopList) { |
491 | for (Loop *L : LoopList) { |
492 | const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L); |
493 | if (ExitCountOuter == SE->getCouldNotCompute()) { |
494 | LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Couldn't compute backedge count\n" ; } } while (false); |
495 | return false; |
496 | } |
497 | if (L->getNumBackEdges() != 1) { |
498 | LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "NumBackEdges is not equal to 1\n" ; } } while (false); |
499 | return false; |
500 | } |
501 | if (!L->getExitingBlock()) { |
502 | LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loop doesn't have unique exit block\n" ; } } while (false); |
503 | return false; |
504 | } |
505 | } |
506 | return true; |
507 | } |
508 | |
509 | unsigned selectLoopForInterchange(const LoopVector &LoopList) { |
510 | // TODO: Add a better heuristic to select the loop to be interchanged based |
511 | // on the dependence matrix. Currently we select the innermost loop. |
512 | return LoopList.size() - 1; |
513 | } |
514 | |
515 | bool processLoopList(LoopVector LoopList, Function &F) { |
516 | bool Changed = false; |
517 | unsigned LoopNestDepth = LoopList.size(); |
518 | if (LoopNestDepth < 2) { |
519 | LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loop doesn't contain minimum nesting level.\n" ; } } while (false); |
520 | return false; |
521 | } |
522 | if (LoopNestDepth > MaxLoopNestDepth) { |
523 | LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Cannot handle loops of depth greater than " << MaxLoopNestDepth << "\n"; } } while (false) |
524 | << MaxLoopNestDepth << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Cannot handle loops of depth greater than " << MaxLoopNestDepth << "\n"; } } while (false); |
525 | return false; |
526 | } |
527 | if (!isComputableLoopNest(LoopList)) { |
528 | LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Not valid loop candidate for interchange\n" ; } } while (false); |
529 | return false; |
530 | } |
531 | |
532 | LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepthdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Processing LoopList of size = " << LoopNestDepth << "\n"; } } while (false) |
533 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Processing LoopList of size = " << LoopNestDepth << "\n"; } } while (false); |
534 | |
535 | CharMatrix DependencyMatrix; |
536 | Loop *OuterMostLoop = *(LoopList.begin()); |
537 | if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth, |
538 | OuterMostLoop, DI)) { |
539 | LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Populating dependency matrix failed\n" ; } } while (false); |
540 | return false; |
541 | } |
542 | #ifdef DUMP_DEP_MATRICIES |
543 | LLVM_DEBUG(dbgs() << "Dependence before interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Dependence before interchange\n" ; } } while (false); |
544 | printDepMatrix(DependencyMatrix); |
545 | #endif |
546 | |
547 | // Get the Outermost loop exit. |
548 | BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock(); |
549 | if (!LoopNestExit) { |
550 | LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "OuterMostLoop needs an unique exit block" ; } } while (false); |
551 | return false; |
552 | } |
553 | |
554 | unsigned SelecLoopId = selectLoopForInterchange(LoopList); |
555 | // Move the selected loop outwards to the best possible position. |
556 | for (unsigned i = SelecLoopId; i > 0; i--) { |
557 | bool Interchanged = |
558 | processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix); |
559 | if (!Interchanged) |
560 | return Changed; |
561 | // Loops interchanged reflect the same in LoopList |
562 | std::swap(LoopList[i - 1], LoopList[i]); |
563 | |
564 | // Update the DependencyMatrix |
565 | interChangeDependencies(DependencyMatrix, i, i - 1); |
566 | #ifdef DUMP_DEP_MATRICIES |
567 | LLVM_DEBUG(dbgs() << "Dependence after interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Dependence after interchange\n" ; } } while (false); |
568 | printDepMatrix(DependencyMatrix); |
569 | #endif |
570 | Changed |= Interchanged; |
571 | } |
572 | return Changed; |
573 | } |
574 | |
575 | bool processLoop(LoopVector LoopList, unsigned InnerLoopId, |
576 | unsigned OuterLoopId, BasicBlock *LoopNestExit, |
577 | std::vector<std::vector<char>> &DependencyMatrix) { |
578 | LLVM_DEBUG(dbgs() << "Processing Inner Loop Id = " << InnerLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Processing Inner Loop Id = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << "\n"; } } while (false) |
579 | << " and OuterLoopId = " << OuterLoopId << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Processing Inner Loop Id = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << "\n"; } } while (false); |
580 | Loop *InnerLoop = LoopList[InnerLoopId]; |
581 | Loop *OuterLoop = LoopList[OuterLoopId]; |
582 | |
583 | LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, LI, DT, |
584 | PreserveLCSSA, ORE); |
585 | if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) { |
586 | LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Not interchanging loops. Cannot prove legality.\n" ; } } while (false); |
587 | return false; |
588 | } |
589 | LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loops are legal to interchange\n" ; } } while (false); |
590 | LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE); |
591 | if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) { |
592 | LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Interchanging loops not profitable.\n" ; } } while (false); |
593 | return false; |
594 | } |
595 | |
596 | ORE->emit([&]() { |
597 | return OptimizationRemark(DEBUG_TYPE"loop-interchange", "Interchanged", |
598 | InnerLoop->getStartLoc(), |
599 | InnerLoop->getHeader()) |
600 | << "Loop interchanged with enclosing loop."; |
601 | }); |
602 | |
603 | LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, |
604 | LoopNestExit, LIL.hasInnerLoopReduction()); |
605 | LIT.transform(); |
606 | LLVM_DEBUG(dbgs() << "Loops interchanged.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loops interchanged.\n" ; } } while (false); |
607 | LoopsInterchanged++; |
608 | return true; |
609 | } |
610 | }; |
611 | |
612 | } // end anonymous namespace |
613 | |
614 | bool LoopInterchangeLegality::areAllUsesReductions(Instruction *Ins, Loop *L) { |
615 | return llvm::none_of(Ins->users(), [=](User *U) -> bool { |
616 | auto *UserIns = dyn_cast<PHINode>(U); |
617 | RecurrenceDescriptor RD; |
618 | return !UserIns || !RecurrenceDescriptor::isReductionPHI(UserIns, L, RD); |
619 | }); |
620 | } |
621 | |
622 | bool LoopInterchangeLegality::containsUnsafeInstructionsInHeader( |
623 | BasicBlock *BB) { |
624 | for (Instruction &I : *BB) { |
625 | // Load corresponding to reduction PHI's are safe while concluding if |
626 | // tightly nested. |
627 | if (LoadInst *L = dyn_cast<LoadInst>(&I)) { |
628 | if (!areAllUsesReductions(L, InnerLoop)) |
629 | return true; |
630 | } else if (I.mayHaveSideEffects() || I.mayReadFromMemory()) |
631 | return true; |
632 | } |
633 | return false; |
634 | } |
635 | |
636 | bool LoopInterchangeLegality::containsUnsafeInstructionsInLatch( |
637 | BasicBlock *BB) { |
638 | for (Instruction &I : *BB) { |
639 | // Stores corresponding to reductions are safe while concluding if tightly |
640 | // nested. |
641 | if (StoreInst *L = dyn_cast<StoreInst>(&I)) { |
642 | if (!isa<PHINode>(L->getOperand(0))) |
643 | return true; |
644 | } else if (I.mayHaveSideEffects() || I.mayReadFromMemory()) |
645 | return true; |
646 | } |
647 | return false; |
648 | } |
649 | |
650 | bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) { |
651 | BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); |
652 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); |
653 | BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); |
654 | |
655 | LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Checking if loops are tightly nested\n" ; } } while (false); |
656 | |
657 | // A perfectly nested loop will not have any branch in between the outer and |
658 | // inner block i.e. outer header will branch to either inner preheader and |
659 | // outerloop latch. |
660 | BranchInst *OuterLoopHeaderBI = |
661 | dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); |
662 | if (!OuterLoopHeaderBI) |
663 | return false; |
664 | |
665 | for (BasicBlock *Succ : OuterLoopHeaderBI->successors()) |
666 | if (Succ != InnerLoopPreHeader && Succ != OuterLoopLatch) |
667 | return false; |
668 | |
669 | LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Checking instructions in Loop header and Loop latch\n" ; } } while (false); |
670 | // We do not have any basic block in between now make sure the outer header |
671 | // and outer loop latch doesn't contain any unsafe instructions. |
672 | if (containsUnsafeInstructionsInHeader(OuterLoopHeader) || |
673 | containsUnsafeInstructionsInLatch(OuterLoopLatch)) |
674 | return false; |
675 | |
676 | LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loops are perfectly nested\n" ; } } while (false); |
677 | // We have a perfect loop nest. |
678 | return true; |
679 | } |
680 | |
681 | bool LoopInterchangeLegality::isLoopStructureUnderstood( |
682 | PHINode *InnerInduction) { |
683 | unsigned Num = InnerInduction->getNumOperands(); |
684 | BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader(); |
685 | for (unsigned i = 0; i < Num; ++i) { |
686 | Value *Val = InnerInduction->getOperand(i); |
687 | if (isa<Constant>(Val)) |
688 | continue; |
689 | Instruction *I = dyn_cast<Instruction>(Val); |
690 | if (!I) |
691 | return false; |
692 | // TODO: Handle triangular loops. |
693 | // e.g. for(int i=0;i<N;i++) |
694 | // for(int j=i;j<N;j++) |
695 | unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i); |
696 | if (InnerInduction->getIncomingBlock(IncomBlockIndx) == |
697 | InnerLoopPreheader && |
698 | !OuterLoop->isLoopInvariant(I)) { |
699 | return false; |
700 | } |
701 | } |
702 | return true; |
703 | } |
704 | |
705 | bool LoopInterchangeLegality::findInductionAndReductions( |
706 | Loop *L, SmallVector<PHINode *, 8> &Inductions, |
707 | SmallVector<PHINode *, 8> &Reductions) { |
708 | if (!L->getLoopLatch() || !L->getLoopPredecessor()) |
709 | return false; |
710 | for (PHINode &PHI : L->getHeader()->phis()) { |
711 | RecurrenceDescriptor RD; |
712 | InductionDescriptor ID; |
713 | if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID)) |
714 | Inductions.push_back(&PHI); |
715 | else if (RecurrenceDescriptor::isReductionPHI(&PHI, L, RD)) |
716 | Reductions.push_back(&PHI); |
717 | else { |
718 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n" ; } } while (false) |
719 | dbgs() << "Failed to recognize PHI as an induction or reduction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n" ; } } while (false); |
720 | return false; |
721 | } |
722 | } |
723 | return true; |
724 | } |
725 | |
726 | static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock) { |
727 | for (PHINode &PHI : Block->phis()) { |
728 | // Reduction lcssa phi will have only 1 incoming block that from loop latch. |
729 | if (PHI.getNumIncomingValues() > 1) |
730 | return false; |
731 | Instruction *Ins = dyn_cast<Instruction>(PHI.getIncomingValue(0)); |
732 | if (!Ins) |
733 | return false; |
734 | // Incoming value for lcssa phi's in outer loop exit can only be inner loop |
735 | // exits lcssa phi else it would not be tightly nested. |
736 | if (!isa<PHINode>(Ins) && isOuterLoopExitBlock) |
737 | return false; |
738 | } |
739 | return true; |
740 | } |
741 | |
742 | // This function indicates the current limitations in the transform as a result |
743 | // of which we do not proceed. |
744 | bool LoopInterchangeLegality::currentLimitations() { |
745 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); |
746 | BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); |
747 | |
748 | // transform currently expects the loop latches to also be the exiting |
749 | // blocks. |
750 | if (InnerLoop->getExitingBlock() != InnerLoopLatch || |
751 | OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() || |
752 | !isa<BranchInst>(InnerLoopLatch->getTerminator()) || |
753 | !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) { |
754 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not" << " supported currently.\n"; } } while (false) |
755 | dbgs() << "Loops where the latch is not the exiting block are not"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not" << " supported currently.\n"; } } while (false) |
756 | << " supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loops where the latch is not the exiting block are not" << " supported currently.\n"; } } while (false); |
757 | ORE->emit([&]() { |
758 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "ExitingNotLatch", |
759 | OuterLoop->getStartLoc(), |
760 | OuterLoop->getHeader()) |
761 | << "Loops where the latch is not the exiting block cannot be" |
762 | " interchange currently."; |
763 | }); |
764 | return true; |
765 | } |
766 | |
767 | PHINode *InnerInductionVar; |
768 | SmallVector<PHINode *, 8> Inductions; |
769 | SmallVector<PHINode *, 8> Reductions; |
770 | if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) { |
771 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes " << "are supported currently.\n"; } } while (false) |
772 | dbgs() << "Only inner loops with induction or reduction PHI nodes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes " << "are supported currently.\n"; } } while (false) |
773 | << "are supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes " << "are supported currently.\n"; } } while (false); |
774 | ORE->emit([&]() { |
775 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIInner", |
776 | InnerLoop->getStartLoc(), |
777 | InnerLoop->getHeader()) |
778 | << "Only inner loops with induction or reduction PHI nodes can be" |
779 | " interchange currently."; |
780 | }); |
781 | return true; |
782 | } |
783 | |
784 | // TODO: Currently we handle only loops with 1 induction variable. |
785 | if (Inductions.size() != 1) { |
786 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "We currently only support loops with 1 induction variable." << "Failed to interchange due to current limitation\n" ; } } while (false) |
787 | dbgs() << "We currently only support loops with 1 induction variable."do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "We currently only support loops with 1 induction variable." << "Failed to interchange due to current limitation\n" ; } } while (false) |
788 | << "Failed to interchange due to current limitation\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "We currently only support loops with 1 induction variable." << "Failed to interchange due to current limitation\n" ; } } while (false); |
789 | ORE->emit([&]() { |
790 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "MultiInductionInner", |
791 | InnerLoop->getStartLoc(), |
792 | InnerLoop->getHeader()) |
793 | << "Only inner loops with 1 induction variable can be " |
794 | "interchanged currently."; |
795 | }); |
796 | return true; |
797 | } |
798 | if (Reductions.size() > 0) |
799 | InnerLoopHasReduction = true; |
800 | |
801 | InnerInductionVar = Inductions.pop_back_val(); |
802 | Reductions.clear(); |
803 | if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) { |
804 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes " << "are supported currently.\n"; } } while (false) |
805 | dbgs() << "Only outer loops with induction or reduction PHI nodes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes " << "are supported currently.\n"; } } while (false) |
806 | << "are supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes " << "are supported currently.\n"; } } while (false); |
807 | ORE->emit([&]() { |
808 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIOuter", |
809 | OuterLoop->getStartLoc(), |
810 | OuterLoop->getHeader()) |
811 | << "Only outer loops with induction or reduction PHI nodes can be" |
812 | " interchanged currently."; |
813 | }); |
814 | return true; |
815 | } |
816 | |
817 | // Outer loop cannot have reduction because then loops will not be tightly |
818 | // nested. |
819 | if (!Reductions.empty()) { |
820 | LLVM_DEBUG(dbgs() << "Outer loops with reductions are not supported "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Outer loops with reductions are not supported " << "currently.\n"; } } while (false) |
821 | << "currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Outer loops with reductions are not supported " << "currently.\n"; } } while (false); |
822 | ORE->emit([&]() { |
823 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "ReductionsOuter", |
824 | OuterLoop->getStartLoc(), |
825 | OuterLoop->getHeader()) |
826 | << "Outer loops with reductions cannot be interchangeed " |
827 | "currently."; |
828 | }); |
829 | return true; |
830 | } |
831 | // TODO: Currently we handle only loops with 1 induction variable. |
832 | if (Inductions.size() != 1) { |
833 | LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loops with more than 1 induction variables are not " << "supported currently.\n"; } } while (false) |
834 | << "supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loops with more than 1 induction variables are not " << "supported currently.\n"; } } while (false); |
835 | ORE->emit([&]() { |
836 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "MultiIndutionOuter", |
837 | OuterLoop->getStartLoc(), |
838 | OuterLoop->getHeader()) |
839 | << "Only outer loops with 1 induction variable can be " |
840 | "interchanged currently."; |
841 | }); |
842 | return true; |
843 | } |
844 | |
845 | // TODO: Triangular loops are not handled for now. |
846 | if (!isLoopStructureUnderstood(InnerInductionVar)) { |
847 | LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loop structure not understood by pass\n" ; } } while (false); |
848 | ORE->emit([&]() { |
849 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedStructureInner", |
850 | InnerLoop->getStartLoc(), |
851 | InnerLoop->getHeader()) |
852 | << "Inner loop structure not understood currently."; |
853 | }); |
854 | return true; |
855 | } |
856 | |
857 | // TODO: We only handle LCSSA PHI's corresponding to reduction for now. |
858 | BasicBlock *InnerExit = InnerLoop->getExitBlock(); |
859 | if (!containsSafePHI(InnerExit, false)) { |
860 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n" ; } } while (false) |
861 | dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n" ; } } while (false); |
862 | ORE->emit([&]() { |
863 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NoLCSSAPHIOuterInner", |
864 | InnerLoop->getStartLoc(), |
865 | InnerLoop->getHeader()) |
866 | << "Only inner loops with LCSSA PHIs can be interchange " |
867 | "currently."; |
868 | }); |
869 | return true; |
870 | } |
871 | |
872 | // TODO: Current limitation: Since we split the inner loop latch at the point |
873 | // were induction variable is incremented (induction.next); We cannot have |
874 | // more than 1 user of induction.next since it would result in broken code |
875 | // after split. |
876 | // e.g. |
877 | // for(i=0;i<N;i++) { |
878 | // for(j = 0;j<M;j++) { |
879 | // A[j+1][i+2] = A[j][i]+k; |
880 | // } |
881 | // } |
882 | Instruction *InnerIndexVarInc = nullptr; |
883 | if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader) |
884 | InnerIndexVarInc = |
885 | dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1)); |
886 | else |
887 | InnerIndexVarInc = |
888 | dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0)); |
889 | |
890 | if (!InnerIndexVarInc) { |
891 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Did not find an instruction to increment the induction " << "variable.\n"; } } while (false) |
892 | dbgs() << "Did not find an instruction to increment the induction "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Did not find an instruction to increment the induction " << "variable.\n"; } } while (false) |
893 | << "variable.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Did not find an instruction to increment the induction " << "variable.\n"; } } while (false); |
894 | ORE->emit([&]() { |
895 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NoIncrementInInner", |
896 | InnerLoop->getStartLoc(), |
897 | InnerLoop->getHeader()) |
898 | << "The inner loop does not increment the induction variable."; |
899 | }); |
900 | return true; |
901 | } |
902 | |
903 | // Since we split the inner loop latch on this induction variable. Make sure |
904 | // we do not have any instruction between the induction variable and branch |
905 | // instruction. |
906 | |
907 | bool FoundInduction = false; |
908 | for (const Instruction &I : |
909 | llvm::reverse(InnerLoopLatch->instructionsWithoutDebug())) { |
910 | if (isa<BranchInst>(I) || isa<CmpInst>(I) || isa<TruncInst>(I) || |
911 | isa<ZExtInst>(I)) |
912 | continue; |
913 | |
914 | // We found an instruction. If this is not induction variable then it is not |
915 | // safe to split this loop latch. |
916 | if (!I.isIdenticalTo(InnerIndexVarInc)) { |
917 | LLVM_DEBUG(dbgs() << "Found unsupported instructions between induction "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Found unsupported instructions between induction " << "variable increment and branch.\n"; } } while (false ) |
918 | << "variable increment and branch.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Found unsupported instructions between induction " << "variable increment and branch.\n"; } } while (false ); |
919 | ORE->emit([&]() { |
920 | return OptimizationRemarkMissed( |
921 | DEBUG_TYPE"loop-interchange", "UnsupportedInsBetweenInduction", |
922 | InnerLoop->getStartLoc(), InnerLoop->getHeader()) |
923 | << "Found unsupported instruction between induction variable " |
924 | "increment and branch."; |
925 | }); |
926 | return true; |
927 | } |
928 | |
929 | FoundInduction = true; |
930 | break; |
931 | } |
932 | // The loop latch ended and we didn't find the induction variable return as |
933 | // current limitation. |
934 | if (!FoundInduction) { |
935 | LLVM_DEBUG(dbgs() << "Did not find the induction variable.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Did not find the induction variable.\n" ; } } while (false); |
936 | ORE->emit([&]() { |
937 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NoIndutionVariable", |
938 | InnerLoop->getStartLoc(), |
939 | InnerLoop->getHeader()) |
940 | << "Did not find the induction variable."; |
941 | }); |
942 | return true; |
943 | } |
944 | return false; |
945 | } |
946 | |
947 | // We currently support LCSSA PHI nodes in the outer loop exit, if their |
948 | // incoming values do not come from the outer loop latch or if the |
949 | // outer loop latch has a single predecessor. In that case, the value will |
950 | // be available if both the inner and outer loop conditions are true, which |
951 | // will still be true after interchanging. If we have multiple predecessor, |
952 | // that may not be the case, e.g. because the outer loop latch may be executed |
953 | // if the inner loop is not executed. |
954 | static bool areLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) { |
955 | BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock(); |
956 | for (PHINode &PHI : LoopNestExit->phis()) { |
957 | // FIXME: We currently are not able to detect floating point reductions |
958 | // and have to use floating point PHIs as a proxy to prevent |
959 | // interchanging in the presence of floating point reductions. |
960 | if (PHI.getType()->isFloatingPointTy()) |
961 | return false; |
962 | for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) { |
963 | Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i)); |
964 | if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch()) |
965 | continue; |
966 | |
967 | // The incoming value is defined in the outer loop latch. Currently we |
968 | // only support that in case the outer loop latch has a single predecessor. |
969 | // This guarantees that the outer loop latch is executed if and only if |
970 | // the inner loop is executed (because tightlyNested() guarantees that the |
971 | // outer loop header only branches to the inner loop or the outer loop |
972 | // latch). |
973 | // FIXME: We could weaken this logic and allow multiple predecessors, |
974 | // if the values are produced outside the loop latch. We would need |
975 | // additional logic to update the PHI nodes in the exit block as |
976 | // well. |
977 | if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr) |
978 | return false; |
979 | } |
980 | } |
981 | return true; |
982 | } |
983 | |
984 | bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, |
985 | unsigned OuterLoopId, |
986 | CharMatrix &DepMatrix) { |
987 | if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) { |
988 | LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << " due to dependence\n"; } } while (false ) |
989 | << " and OuterLoopId = " << OuterLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << " due to dependence\n"; } } while (false ) |
990 | << " due to dependence\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << " due to dependence\n"; } } while (false ); |
991 | ORE->emit([&]() { |
992 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "Dependence", |
993 | InnerLoop->getStartLoc(), |
994 | InnerLoop->getHeader()) |
995 | << "Cannot interchange loops due to dependences."; |
996 | }); |
997 | return false; |
998 | } |
999 | // Check if outer and inner loop contain legal instructions only. |
1000 | for (auto *BB : OuterLoop->blocks()) |
1001 | for (Instruction &I : BB->instructionsWithoutDebug()) |
1002 | if (CallInst *CI = dyn_cast<CallInst>(&I)) { |
1003 | // readnone functions do not prevent interchanging. |
1004 | if (CI->doesNotReadMemory()) |
1005 | continue; |
1006 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged " << "safely."; } } while (false) |
1007 | dbgs() << "Loops with call instructions cannot be interchanged "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged " << "safely."; } } while (false) |
1008 | << "safely.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged " << "safely."; } } while (false); |
1009 | ORE->emit([&]() { |
1010 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "CallInst", |
1011 | CI->getDebugLoc(), |
1012 | CI->getParent()) |
1013 | << "Cannot interchange loops due to call instruction."; |
1014 | }); |
1015 | |
1016 | return false; |
1017 | } |
1018 | |
1019 | // Create unique Preheaders if we already do not have one. |
1020 | BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); |
1021 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); |
1022 | |
1023 | // Create a unique outer preheader - |
1024 | // 1) If OuterLoop preheader is not present. |
1025 | // 2) If OuterLoop Preheader is same as OuterLoop Header |
1026 | // 3) If OuterLoop Preheader is same as Header of the previous loop. |
1027 | // 4) If OuterLoop Preheader is Entry node. |
1028 | if (!OuterLoopPreHeader || OuterLoopPreHeader == OuterLoop->getHeader() || |
1029 | isa<PHINode>(OuterLoopPreHeader->begin()) || |
1030 | !OuterLoopPreHeader->getUniquePredecessor()) { |
1031 | OuterLoopPreHeader = |
1032 | InsertPreheaderForLoop(OuterLoop, DT, LI, PreserveLCSSA); |
1033 | } |
1034 | |
1035 | if (!InnerLoopPreHeader || InnerLoopPreHeader == InnerLoop->getHeader() || |
1036 | InnerLoopPreHeader == OuterLoop->getHeader()) { |
1037 | InnerLoopPreHeader = |
Value stored to 'InnerLoopPreHeader' is never read | |
1038 | InsertPreheaderForLoop(InnerLoop, DT, LI, PreserveLCSSA); |
1039 | } |
1040 | |
1041 | // TODO: The loops could not be interchanged due to current limitations in the |
1042 | // transform module. |
1043 | if (currentLimitations()) { |
1044 | LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Not legal because of current transform limitation\n" ; } } while (false); |
1045 | return false; |
1046 | } |
1047 | |
1048 | // Check if the loops are tightly nested. |
1049 | if (!tightlyNested(OuterLoop, InnerLoop)) { |
1050 | LLVM_DEBUG(dbgs() << "Loops not tightly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loops not tightly nested\n" ; } } while (false); |
1051 | ORE->emit([&]() { |
1052 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NotTightlyNested", |
1053 | InnerLoop->getStartLoc(), |
1054 | InnerLoop->getHeader()) |
1055 | << "Cannot interchange loops because they are not tightly " |
1056 | "nested."; |
1057 | }); |
1058 | return false; |
1059 | } |
1060 | |
1061 | if (!areLoopExitPHIsSupported(OuterLoop, InnerLoop)) { |
1062 | LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Found unsupported PHI nodes in outer loop exit.\n" ; } } while (false); |
1063 | ORE->emit([&]() { |
1064 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedExitPHI", |
1065 | OuterLoop->getStartLoc(), |
1066 | OuterLoop->getHeader()) |
1067 | << "Found unsupported PHI node in loop exit."; |
1068 | }); |
1069 | return false; |
1070 | } |
1071 | |
1072 | return true; |
1073 | } |
1074 | |
1075 | int LoopInterchangeProfitability::getInstrOrderCost() { |
1076 | unsigned GoodOrder, BadOrder; |
1077 | BadOrder = GoodOrder = 0; |
1078 | for (BasicBlock *BB : InnerLoop->blocks()) { |
1079 | for (Instruction &Ins : *BB) { |
1080 | if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) { |
1081 | unsigned NumOp = GEP->getNumOperands(); |
1082 | bool FoundInnerInduction = false; |
1083 | bool FoundOuterInduction = false; |
1084 | for (unsigned i = 0; i < NumOp; ++i) { |
1085 | const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i)); |
1086 | const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal); |
1087 | if (!AR) |
1088 | continue; |
1089 | |
1090 | // If we find the inner induction after an outer induction e.g. |
1091 | // for(int i=0;i<N;i++) |
1092 | // for(int j=0;j<N;j++) |
1093 | // A[i][j] = A[i-1][j-1]+k; |
1094 | // then it is a good order. |
1095 | if (AR->getLoop() == InnerLoop) { |
1096 | // We found an InnerLoop induction after OuterLoop induction. It is |
1097 | // a good order. |
1098 | FoundInnerInduction = true; |
1099 | if (FoundOuterInduction) { |
1100 | GoodOrder++; |
1101 | break; |
1102 | } |
1103 | } |
1104 | // If we find the outer induction after an inner induction e.g. |
1105 | // for(int i=0;i<N;i++) |
1106 | // for(int j=0;j<N;j++) |
1107 | // A[j][i] = A[j-1][i-1]+k; |
1108 | // then it is a bad order. |
1109 | if (AR->getLoop() == OuterLoop) { |
1110 | // We found an OuterLoop induction after InnerLoop induction. It is |
1111 | // a bad order. |
1112 | FoundOuterInduction = true; |
1113 | if (FoundInnerInduction) { |
1114 | BadOrder++; |
1115 | break; |
1116 | } |
1117 | } |
1118 | } |
1119 | } |
1120 | } |
1121 | } |
1122 | return GoodOrder - BadOrder; |
1123 | } |
1124 | |
1125 | static bool isProfitableForVectorization(unsigned InnerLoopId, |
1126 | unsigned OuterLoopId, |
1127 | CharMatrix &DepMatrix) { |
1128 | // TODO: Improve this heuristic to catch more cases. |
1129 | // If the inner loop is loop independent or doesn't carry any dependency it is |
1130 | // profitable to move this to outer position. |
1131 | for (auto &Row : DepMatrix) { |
1132 | if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I') |
1133 | return false; |
1134 | // TODO: We need to improve this heuristic. |
1135 | if (Row[OuterLoopId] != '=') |
1136 | return false; |
1137 | } |
1138 | // If outer loop has dependence and inner loop is loop independent then it is |
1139 | // profitable to interchange to enable parallelism. |
1140 | // If there are no dependences, interchanging will not improve anything. |
1141 | return !DepMatrix.empty(); |
1142 | } |
1143 | |
1144 | bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId, |
1145 | unsigned OuterLoopId, |
1146 | CharMatrix &DepMatrix) { |
1147 | // TODO: Add better profitability checks. |
1148 | // e.g |
1149 | // 1) Construct dependency matrix and move the one with no loop carried dep |
1150 | // inside to enable vectorization. |
1151 | |
1152 | // This is rough cost estimation algorithm. It counts the good and bad order |
1153 | // of induction variables in the instruction and allows reordering if number |
1154 | // of bad orders is more than good. |
1155 | int Cost = getInstrOrderCost(); |
1156 | LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Cost = " << Cost << "\n"; } } while (false); |
1157 | if (Cost < -LoopInterchangeCostThreshold) |
1158 | return true; |
1159 | |
1160 | // It is not profitable as per current cache profitability model. But check if |
1161 | // we can move this loop outside to improve parallelism. |
1162 | if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix)) |
1163 | return true; |
1164 | |
1165 | ORE->emit([&]() { |
1166 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "InterchangeNotProfitable", |
1167 | InnerLoop->getStartLoc(), |
1168 | InnerLoop->getHeader()) |
1169 | << "Interchanging loops is too costly (cost=" |
1170 | << ore::NV("Cost", Cost) << ", threshold=" |
1171 | << ore::NV("Threshold", LoopInterchangeCostThreshold) |
1172 | << ") and it does not improve parallelism."; |
1173 | }); |
1174 | return false; |
1175 | } |
1176 | |
1177 | void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop, |
1178 | Loop *InnerLoop) { |
1179 | for (Loop *L : *OuterLoop) |
1180 | if (L == InnerLoop) { |
1181 | OuterLoop->removeChildLoop(L); |
1182 | return; |
1183 | } |
1184 | llvm_unreachable("Couldn't find loop")::llvm::llvm_unreachable_internal("Couldn't find loop", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Scalar/LoopInterchange.cpp" , 1184); |
1185 | } |
1186 | |
1187 | /// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the |
1188 | /// new inner and outer loop after interchanging: NewInner is the original |
1189 | /// outer loop and NewOuter is the original inner loop. |
1190 | /// |
1191 | /// Before interchanging, we have the following structure |
1192 | /// Outer preheader |
1193 | // Outer header |
1194 | // Inner preheader |
1195 | // Inner header |
1196 | // Inner body |
1197 | // Inner latch |
1198 | // outer bbs |
1199 | // Outer latch |
1200 | // |
1201 | // After interchanging: |
1202 | // Inner preheader |
1203 | // Inner header |
1204 | // Outer preheader |
1205 | // Outer header |
1206 | // Inner body |
1207 | // outer bbs |
1208 | // Outer latch |
1209 | // Inner latch |
1210 | void LoopInterchangeTransform::restructureLoops( |
1211 | Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader, |
1212 | BasicBlock *OrigOuterPreHeader) { |
1213 | Loop *OuterLoopParent = OuterLoop->getParentLoop(); |
1214 | // The original inner loop preheader moves from the new inner loop to |
1215 | // the parent loop, if there is one. |
1216 | NewInner->removeBlockFromLoop(OrigInnerPreHeader); |
1217 | LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent); |
1218 | |
1219 | // Switch the loop levels. |
1220 | if (OuterLoopParent) { |
1221 | // Remove the loop from its parent loop. |
1222 | removeChildLoop(OuterLoopParent, NewInner); |
1223 | removeChildLoop(NewInner, NewOuter); |
1224 | OuterLoopParent->addChildLoop(NewOuter); |
1225 | } else { |
1226 | removeChildLoop(NewInner, NewOuter); |
1227 | LI->changeTopLevelLoop(NewInner, NewOuter); |
1228 | } |
1229 | while (!NewOuter->empty()) |
1230 | NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin())); |
1231 | NewOuter->addChildLoop(NewInner); |
1232 | |
1233 | // BBs from the original inner loop. |
1234 | SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks()); |
1235 | |
1236 | // Add BBs from the original outer loop to the original inner loop (excluding |
1237 | // BBs already in inner loop) |
1238 | for (BasicBlock *BB : NewInner->blocks()) |
1239 | if (LI->getLoopFor(BB) == NewInner) |
1240 | NewOuter->addBlockEntry(BB); |
1241 | |
1242 | // Now remove inner loop header and latch from the new inner loop and move |
1243 | // other BBs (the loop body) to the new inner loop. |
1244 | BasicBlock *OuterHeader = NewOuter->getHeader(); |
1245 | BasicBlock *OuterLatch = NewOuter->getLoopLatch(); |
1246 | for (BasicBlock *BB : OrigInnerBBs) { |
1247 | // Nothing will change for BBs in child loops. |
1248 | if (LI->getLoopFor(BB) != NewOuter) |
1249 | continue; |
1250 | // Remove the new outer loop header and latch from the new inner loop. |
1251 | if (BB == OuterHeader || BB == OuterLatch) |
1252 | NewInner->removeBlockFromLoop(BB); |
1253 | else |
1254 | LI->changeLoopFor(BB, NewInner); |
1255 | } |
1256 | |
1257 | // The preheader of the original outer loop becomes part of the new |
1258 | // outer loop. |
1259 | NewOuter->addBlockEntry(OrigOuterPreHeader); |
1260 | LI->changeLoopFor(OrigOuterPreHeader, NewOuter); |
1261 | } |
1262 | |
1263 | bool LoopInterchangeTransform::transform() { |
1264 | bool Transformed = false; |
1265 | Instruction *InnerIndexVar; |
1266 | |
1267 | if (InnerLoop->getSubLoops().empty()) { |
1268 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); |
1269 | LLVM_DEBUG(dbgs() << "Calling Split Inner Loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Calling Split Inner Loop\n" ; } } while (false); |
1270 | PHINode *InductionPHI = getInductionVariable(InnerLoop, SE); |
1271 | if (!InductionPHI) { |
1272 | LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Failed to find the point to split loop latch \n" ; } } while (false); |
1273 | return false; |
1274 | } |
1275 | |
1276 | if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader) |
1277 | InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(1)); |
1278 | else |
1279 | InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0)); |
1280 | |
1281 | // Ensure that InductionPHI is the first Phi node. |
1282 | if (&InductionPHI->getParent()->front() != InductionPHI) |
1283 | InductionPHI->moveBefore(&InductionPHI->getParent()->front()); |
1284 | |
1285 | // Split at the place were the induction variable is |
1286 | // incremented/decremented. |
1287 | // TODO: This splitting logic may not work always. Fix this. |
1288 | splitInnerLoopLatch(InnerIndexVar); |
1289 | LLVM_DEBUG(dbgs() << "splitInnerLoopLatch done\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "splitInnerLoopLatch done\n" ; } } while (false); |
1290 | |
1291 | // Splits the inner loops phi nodes out into a separate basic block. |
1292 | BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); |
1293 | SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI); |
1294 | LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "splitting InnerLoopHeader done\n" ; } } while (false); |
1295 | } |
1296 | |
1297 | Transformed |= adjustLoopLinks(); |
1298 | if (!Transformed) { |
1299 | LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "adjustLoopLinks failed\n" ; } } while (false); |
1300 | return false; |
1301 | } |
1302 | |
1303 | return true; |
1304 | } |
1305 | |
1306 | void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) { |
1307 | BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); |
1308 | BasicBlock *InnerLoopLatchPred = InnerLoopLatch; |
1309 | InnerLoopLatch = SplitBlock(InnerLoopLatchPred, Inc, DT, LI); |
1310 | } |
1311 | |
1312 | /// \brief Move all instructions except the terminator from FromBB right before |
1313 | /// InsertBefore |
1314 | static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) { |
1315 | auto &ToList = InsertBefore->getParent()->getInstList(); |
1316 | auto &FromList = FromBB->getInstList(); |
1317 | |
1318 | ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(), |
1319 | FromBB->getTerminator()->getIterator()); |
1320 | } |
1321 | |
1322 | void LoopInterchangeTransform::updateIncomingBlock(BasicBlock *CurrBlock, |
1323 | BasicBlock *OldPred, |
1324 | BasicBlock *NewPred) { |
1325 | for (PHINode &PHI : CurrBlock->phis()) { |
1326 | unsigned Num = PHI.getNumIncomingValues(); |
1327 | for (unsigned i = 0; i < Num; ++i) { |
1328 | if (PHI.getIncomingBlock(i) == OldPred) |
1329 | PHI.setIncomingBlock(i, NewPred); |
1330 | } |
1331 | } |
1332 | } |
1333 | |
1334 | /// Update BI to jump to NewBB instead of OldBB. Records updates to |
1335 | /// the dominator tree in DTUpdates, if DT should be preserved. |
1336 | static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, |
1337 | BasicBlock *NewBB, |
1338 | std::vector<DominatorTree::UpdateType> &DTUpdates) { |
1339 | assert(llvm::count_if(BI->successors(),(static_cast <bool> (llvm::count_if(BI->successors() , [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 && "BI must jump to OldBB at most once.") ? void (0) : __assert_fail ("llvm::count_if(BI->successors(), [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 && \"BI must jump to OldBB at most once.\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Scalar/LoopInterchange.cpp" , 1341, __extension__ __PRETTY_FUNCTION__)) |
1340 | [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 &&(static_cast <bool> (llvm::count_if(BI->successors() , [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 && "BI must jump to OldBB at most once.") ? void (0) : __assert_fail ("llvm::count_if(BI->successors(), [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 && \"BI must jump to OldBB at most once.\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Scalar/LoopInterchange.cpp" , 1341, __extension__ __PRETTY_FUNCTION__)) |
1341 | "BI must jump to OldBB at most once.")(static_cast <bool> (llvm::count_if(BI->successors() , [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 && "BI must jump to OldBB at most once.") ? void (0) : __assert_fail ("llvm::count_if(BI->successors(), [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 && \"BI must jump to OldBB at most once.\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Scalar/LoopInterchange.cpp" , 1341, __extension__ __PRETTY_FUNCTION__)); |
1342 | for (unsigned i = 0, e = BI->getNumSuccessors(); i < e; ++i) { |
1343 | if (BI->getSuccessor(i) == OldBB) { |
1344 | BI->setSuccessor(i, NewBB); |
1345 | |
1346 | DTUpdates.push_back( |
1347 | {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB}); |
1348 | DTUpdates.push_back( |
1349 | {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB}); |
1350 | break; |
1351 | } |
1352 | } |
1353 | } |
1354 | |
1355 | bool LoopInterchangeTransform::adjustLoopBranches() { |
1356 | LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "adjustLoopBranches called\n" ; } } while (false); |
1357 | std::vector<DominatorTree::UpdateType> DTUpdates; |
1358 | |
1359 | // Adjust the loop preheader |
1360 | BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); |
1361 | BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); |
1362 | BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); |
1363 | BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); |
1364 | BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); |
1365 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); |
1366 | BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor(); |
1367 | BasicBlock *InnerLoopLatchPredecessor = |
1368 | InnerLoopLatch->getUniquePredecessor(); |
1369 | BasicBlock *InnerLoopLatchSuccessor; |
1370 | BasicBlock *OuterLoopLatchSuccessor; |
1371 | |
1372 | BranchInst *OuterLoopLatchBI = |
1373 | dyn_cast<BranchInst>(OuterLoopLatch->getTerminator()); |
1374 | BranchInst *InnerLoopLatchBI = |
1375 | dyn_cast<BranchInst>(InnerLoopLatch->getTerminator()); |
1376 | BranchInst *OuterLoopHeaderBI = |
1377 | dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); |
1378 | BranchInst *InnerLoopHeaderBI = |
1379 | dyn_cast<BranchInst>(InnerLoopHeader->getTerminator()); |
1380 | |
1381 | if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor || |
1382 | !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI || |
1383 | !InnerLoopHeaderBI) |
1384 | return false; |
1385 | |
1386 | BranchInst *InnerLoopLatchPredecessorBI = |
1387 | dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator()); |
1388 | BranchInst *OuterLoopPredecessorBI = |
1389 | dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator()); |
1390 | |
1391 | if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI) |
1392 | return false; |
1393 | BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor(); |
1394 | if (!InnerLoopHeaderSuccessor) |
1395 | return false; |
1396 | |
1397 | // Adjust Loop Preheader and headers |
1398 | updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader, |
1399 | InnerLoopPreHeader, DTUpdates); |
1400 | updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates); |
1401 | updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader, |
1402 | InnerLoopHeaderSuccessor, DTUpdates); |
1403 | |
1404 | // Adjust reduction PHI's now that the incoming block has changed. |
1405 | updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader, |
1406 | OuterLoopHeader); |
1407 | |
1408 | updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor, |
1409 | OuterLoopPreHeader, DTUpdates); |
1410 | |
1411 | // -------------Adjust loop latches----------- |
1412 | if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader) |
1413 | InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1); |
1414 | else |
1415 | InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0); |
1416 | |
1417 | updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch, |
1418 | InnerLoopLatchSuccessor, DTUpdates); |
1419 | |
1420 | // Adjust PHI nodes in InnerLoopLatchSuccessor. Update all uses of PHI with |
1421 | // the value and remove this PHI node from inner loop. |
1422 | SmallVector<PHINode *, 8> LcssaVec; |
1423 | for (PHINode &P : InnerLoopLatchSuccessor->phis()) |
1424 | LcssaVec.push_back(&P); |
1425 | |
1426 | for (PHINode *P : LcssaVec) { |
1427 | Value *Incoming = P->getIncomingValueForBlock(InnerLoopLatch); |
1428 | P->replaceAllUsesWith(Incoming); |
1429 | P->eraseFromParent(); |
1430 | } |
1431 | |
1432 | if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader) |
1433 | OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1); |
1434 | else |
1435 | OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0); |
1436 | |
1437 | updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor, |
1438 | OuterLoopLatchSuccessor, DTUpdates); |
1439 | updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch, |
1440 | DTUpdates); |
1441 | |
1442 | updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch); |
1443 | |
1444 | DT->applyUpdates(DTUpdates); |
1445 | restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader, |
1446 | OuterLoopPreHeader); |
1447 | |
1448 | // Now update the reduction PHIs in the inner and outer loop headers. |
1449 | SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs; |
1450 | for (PHINode &PHI : drop_begin(InnerLoopHeader->phis(), 1)) |
1451 | InnerLoopPHIs.push_back(cast<PHINode>(&PHI)); |
1452 | for (PHINode &PHI : drop_begin(OuterLoopHeader->phis(), 1)) |
1453 | OuterLoopPHIs.push_back(cast<PHINode>(&PHI)); |
1454 | |
1455 | for (PHINode *PHI : OuterLoopPHIs) |
1456 | PHI->moveBefore(InnerLoopHeader->getFirstNonPHI()); |
1457 | |
1458 | // Move the PHI nodes from the inner loop header to the outer loop header. |
1459 | // We have to deal with one kind of PHI nodes: |
1460 | // 1) PHI nodes that are part of inner loop-only reductions. |
1461 | // We only have to move the PHI node and update the incoming blocks. |
1462 | for (PHINode *PHI : InnerLoopPHIs) { |
1463 | PHI->moveBefore(OuterLoopHeader->getFirstNonPHI()); |
1464 | for (BasicBlock *InBB : PHI->blocks()) { |
1465 | if (InnerLoop->contains(InBB)) |
1466 | continue; |
1467 | |
1468 | assert(!isa<PHINode>(PHI->getIncomingValueForBlock(InBB)) &&(static_cast <bool> (!isa<PHINode>(PHI->getIncomingValueForBlock (InBB)) && "Unexpected incoming PHI node, reductions in outer loop are not " "supported yet") ? void (0) : __assert_fail ("!isa<PHINode>(PHI->getIncomingValueForBlock(InBB)) && \"Unexpected incoming PHI node, reductions in outer loop are not \" \"supported yet\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Scalar/LoopInterchange.cpp" , 1470, __extension__ __PRETTY_FUNCTION__)) |
1469 | "Unexpected incoming PHI node, reductions in outer loop are not "(static_cast <bool> (!isa<PHINode>(PHI->getIncomingValueForBlock (InBB)) && "Unexpected incoming PHI node, reductions in outer loop are not " "supported yet") ? void (0) : __assert_fail ("!isa<PHINode>(PHI->getIncomingValueForBlock(InBB)) && \"Unexpected incoming PHI node, reductions in outer loop are not \" \"supported yet\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Scalar/LoopInterchange.cpp" , 1470, __extension__ __PRETTY_FUNCTION__)) |
1470 | "supported yet")(static_cast <bool> (!isa<PHINode>(PHI->getIncomingValueForBlock (InBB)) && "Unexpected incoming PHI node, reductions in outer loop are not " "supported yet") ? void (0) : __assert_fail ("!isa<PHINode>(PHI->getIncomingValueForBlock(InBB)) && \"Unexpected incoming PHI node, reductions in outer loop are not \" \"supported yet\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Scalar/LoopInterchange.cpp" , 1470, __extension__ __PRETTY_FUNCTION__)); |
1471 | PHI->replaceAllUsesWith(PHI->getIncomingValueForBlock(InBB)); |
1472 | PHI->eraseFromParent(); |
1473 | break; |
1474 | } |
1475 | } |
1476 | |
1477 | // Update the incoming blocks for moved PHI nodes. |
1478 | updateIncomingBlock(OuterLoopHeader, InnerLoopPreHeader, OuterLoopPreHeader); |
1479 | updateIncomingBlock(OuterLoopHeader, InnerLoopLatch, OuterLoopLatch); |
1480 | updateIncomingBlock(InnerLoopHeader, OuterLoopPreHeader, InnerLoopPreHeader); |
1481 | updateIncomingBlock(InnerLoopHeader, OuterLoopLatch, InnerLoopLatch); |
1482 | |
1483 | return true; |
1484 | } |
1485 | |
1486 | void LoopInterchangeTransform::adjustLoopPreheaders() { |
1487 | // We have interchanged the preheaders so we need to interchange the data in |
1488 | // the preheader as well. |
1489 | // This is because the content of inner preheader was previously executed |
1490 | // inside the outer loop. |
1491 | BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); |
1492 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); |
1493 | BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); |
1494 | BranchInst *InnerTermBI = |
1495 | cast<BranchInst>(InnerLoopPreHeader->getTerminator()); |
1496 | |
1497 | // These instructions should now be executed inside the loop. |
1498 | // Move instruction into a new block after outer header. |
1499 | moveBBContents(InnerLoopPreHeader, OuterLoopHeader->getTerminator()); |
1500 | // These instructions were not executed previously in the loop so move them to |
1501 | // the older inner loop preheader. |
1502 | moveBBContents(OuterLoopPreHeader, InnerTermBI); |
1503 | } |
1504 | |
1505 | bool LoopInterchangeTransform::adjustLoopLinks() { |
1506 | // Adjust all branches in the inner and outer loop. |
1507 | bool Changed = adjustLoopBranches(); |
1508 | if (Changed) |
1509 | adjustLoopPreheaders(); |
1510 | return Changed; |
1511 | } |
1512 | |
1513 | char LoopInterchange::ID = 0; |
1514 | |
1515 | INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",static void *initializeLoopInterchangePassOnce(PassRegistry & Registry) { |
1516 | "Interchanges loops for cache reuse", false, false)static void *initializeLoopInterchangePassOnce(PassRegistry & Registry) { |
1517 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry); |
1518 | INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)initializeDependenceAnalysisWrapperPassPass(Registry); |
1519 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry); |
1520 | INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)initializeScalarEvolutionWrapperPassPass(Registry); |
1521 | INITIALIZE_PASS_DEPENDENCY(LoopSimplify)initializeLoopSimplifyPass(Registry); |
1522 | INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)initializeLCSSAWrapperPassPass(Registry); |
1523 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry); |
1524 | INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)initializeOptimizationRemarkEmitterWrapperPassPass(Registry); |
1525 | |
1526 | INITIALIZE_PASS_END(LoopInterchange, "loop-interchange",PassInfo *PI = new PassInfo( "Interchanges loops for cache reuse" , "loop-interchange", &LoopInterchange::ID, PassInfo::NormalCtor_t (callDefaultCtor<LoopInterchange>), false, false); Registry .registerPass(*PI, true); return PI; } static llvm::once_flag InitializeLoopInterchangePassFlag; void llvm::initializeLoopInterchangePass (PassRegistry &Registry) { llvm::call_once(InitializeLoopInterchangePassFlag , initializeLoopInterchangePassOnce, std::ref(Registry)); } |
1527 | "Interchanges loops for cache reuse", false, false)PassInfo *PI = new PassInfo( "Interchanges loops for cache reuse" , "loop-interchange", &LoopInterchange::ID, PassInfo::NormalCtor_t (callDefaultCtor<LoopInterchange>), false, false); Registry .registerPass(*PI, true); return PI; } static llvm::once_flag InitializeLoopInterchangePassFlag; void llvm::initializeLoopInterchangePass (PassRegistry &Registry) { llvm::call_once(InitializeLoopInterchangePassFlag , initializeLoopInterchangePassOnce, std::ref(Registry)); } |
1528 | |
1529 | Pass *llvm::createLoopInterchangePass() { return new LoopInterchange(); } |