File: | lib/Transforms/Scalar/LoopInterchange.cpp |
Warning: | line 1002, column 5 Value stored to 'OuterLoopPreHeader' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- LoopInterchange.cpp - Loop interchange pass-------------------------===// |
2 | // |
3 | // The LLVM Compiler Infrastructure |
4 | // |
5 | // This file is distributed under the University of Illinois Open Source |
6 | // License. See LICENSE.TXT for details. |
7 | // |
8 | //===----------------------------------------------------------------------===// |
9 | // |
10 | // This Pass handles loop interchange transform. |
11 | // This pass interchanges loops to provide a more cache-friendly memory access |
12 | // patterns. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include "llvm/ADT/STLExtras.h" |
17 | #include "llvm/ADT/SmallVector.h" |
18 | #include "llvm/ADT/StringRef.h" |
19 | #include "llvm/Analysis/AliasAnalysis.h" |
20 | #include "llvm/Analysis/DependenceAnalysis.h" |
21 | #include "llvm/Analysis/LoopInfo.h" |
22 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
23 | #include "llvm/Analysis/ScalarEvolution.h" |
24 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
25 | #include "llvm/IR/BasicBlock.h" |
26 | #include "llvm/IR/Constants.h" |
27 | #include "llvm/IR/DiagnosticInfo.h" |
28 | #include "llvm/IR/Dominators.h" |
29 | #include "llvm/IR/Function.h" |
30 | #include "llvm/IR/InstrTypes.h" |
31 | #include "llvm/IR/Instruction.h" |
32 | #include "llvm/IR/Instructions.h" |
33 | #include "llvm/IR/Type.h" |
34 | #include "llvm/IR/User.h" |
35 | #include "llvm/IR/Value.h" |
36 | #include "llvm/Pass.h" |
37 | #include "llvm/Support/Casting.h" |
38 | #include "llvm/Support/CommandLine.h" |
39 | #include "llvm/Support/Debug.h" |
40 | #include "llvm/Support/ErrorHandling.h" |
41 | #include "llvm/Support/raw_ostream.h" |
42 | #include "llvm/Transforms/Scalar.h" |
43 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
44 | #include "llvm/Transforms/Utils/LoopUtils.h" |
45 | #include <cassert> |
46 | #include <utility> |
47 | #include <vector> |
48 | |
49 | using namespace llvm; |
50 | |
51 | #define DEBUG_TYPE"loop-interchange" "loop-interchange" |
52 | |
53 | static cl::opt<int> LoopInterchangeCostThreshold( |
54 | "loop-interchange-threshold", cl::init(0), cl::Hidden, |
55 | cl::desc("Interchange if you gain more than this number")); |
56 | |
57 | namespace { |
58 | |
59 | using LoopVector = SmallVector<Loop *, 8>; |
60 | |
61 | // TODO: Check if we can use a sparse matrix here. |
62 | using CharMatrix = std::vector<std::vector<char>>; |
63 | |
64 | } // end anonymous namespace |
65 | |
66 | // Maximum number of dependencies that can be handled in the dependency matrix. |
67 | static const unsigned MaxMemInstrCount = 100; |
68 | |
69 | // Maximum loop depth supported. |
70 | static const unsigned MaxLoopNestDepth = 10; |
71 | |
72 | #ifdef DUMP_DEP_MATRICIES |
73 | static void printDepMatrix(CharMatrix &DepMatrix) { |
74 | for (auto &Row : DepMatrix) { |
75 | for (auto D : Row) |
76 | DEBUG(dbgs() << D << " ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << D << " "; } } while (false); |
77 | DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "\n"; } } while (false ); |
78 | } |
79 | } |
80 | #endif |
81 | |
82 | static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, |
83 | Loop *L, DependenceInfo *DI) { |
84 | using ValueVector = SmallVector<Value *, 16>; |
85 | |
86 | ValueVector MemInstr; |
87 | |
88 | // For each block. |
89 | for (BasicBlock *BB : L->blocks()) { |
90 | // Scan the BB and collect legal loads and stores. |
91 | for (Instruction &I : *BB) { |
92 | if (!isa<Instruction>(I)) |
93 | return false; |
94 | if (auto *Ld = dyn_cast<LoadInst>(&I)) { |
95 | if (!Ld->isSimple()) |
96 | return false; |
97 | MemInstr.push_back(&I); |
98 | } else if (auto *St = dyn_cast<StoreInst>(&I)) { |
99 | if (!St->isSimple()) |
100 | return false; |
101 | MemInstr.push_back(&I); |
102 | } |
103 | } |
104 | } |
105 | |
106 | DEBUG(dbgs() << "Found " << MemInstr.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Found " << MemInstr .size() << " Loads and Stores to analyze\n"; } } while ( false) |
107 | << " Loads and Stores to analyze\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Found " << MemInstr .size() << " Loads and Stores to analyze\n"; } } while ( false); |
108 | |
109 | ValueVector::iterator I, IE, J, JE; |
110 | |
111 | for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) { |
112 | for (J = I, JE = MemInstr.end(); J != JE; ++J) { |
113 | std::vector<char> Dep; |
114 | Instruction *Src = cast<Instruction>(*I); |
115 | Instruction *Dst = cast<Instruction>(*J); |
116 | if (Src == Dst) |
117 | continue; |
118 | // Ignore Input dependencies. |
119 | if (isa<LoadInst>(Src) && isa<LoadInst>(Dst)) |
120 | continue; |
121 | // Track Output, Flow, and Anti dependencies. |
122 | if (auto D = DI->depends(Src, Dst, true)) { |
123 | assert(D->isOrdered() && "Expected an output, flow or anti dep.")(static_cast <bool> (D->isOrdered() && "Expected an output, flow or anti dep." ) ? void (0) : __assert_fail ("D->isOrdered() && \"Expected an output, flow or anti dep.\"" , "/build/llvm-toolchain-snapshot-7~svn326551/lib/Transforms/Scalar/LoopInterchange.cpp" , 123, __extension__ __PRETTY_FUNCTION__)); |
124 | DEBUG(StringRef DepType =do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output"; dbgs() << "Found " << DepType << " dependency between Src and Dst\n" << " Src:" << *Src << "\n Dst:" << * Dst << '\n'; } } while (false) |
125 | D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output"; dbgs() << "Found " << DepType << " dependency between Src and Dst\n" << " Src:" << *Src << "\n Dst:" << * Dst << '\n'; } } while (false) |
126 | dbgs() << "Found " << DepTypedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output"; dbgs() << "Found " << DepType << " dependency between Src and Dst\n" << " Src:" << *Src << "\n Dst:" << * Dst << '\n'; } } while (false) |
127 | << " dependency between Src and Dst\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output"; dbgs() << "Found " << DepType << " dependency between Src and Dst\n" << " Src:" << *Src << "\n Dst:" << * Dst << '\n'; } } while (false) |
128 | << " Src:" << *Src << "\n Dst:" << *Dst << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { StringRef DepType = D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output"; dbgs() << "Found " << DepType << " dependency between Src and Dst\n" << " Src:" << *Src << "\n Dst:" << * Dst << '\n'; } } while (false); |
129 | unsigned Levels = D->getLevels(); |
130 | char Direction; |
131 | for (unsigned II = 1; II <= Levels; ++II) { |
132 | const SCEV *Distance = D->getDistance(II); |
133 | const SCEVConstant *SCEVConst = |
134 | dyn_cast_or_null<SCEVConstant>(Distance); |
135 | if (SCEVConst) { |
136 | const ConstantInt *CI = SCEVConst->getValue(); |
137 | if (CI->isNegative()) |
138 | Direction = '<'; |
139 | else if (CI->isZero()) |
140 | Direction = '='; |
141 | else |
142 | Direction = '>'; |
143 | Dep.push_back(Direction); |
144 | } else if (D->isScalar(II)) { |
145 | Direction = 'S'; |
146 | Dep.push_back(Direction); |
147 | } else { |
148 | unsigned Dir = D->getDirection(II); |
149 | if (Dir == Dependence::DVEntry::LT || |
150 | Dir == Dependence::DVEntry::LE) |
151 | Direction = '<'; |
152 | else if (Dir == Dependence::DVEntry::GT || |
153 | Dir == Dependence::DVEntry::GE) |
154 | Direction = '>'; |
155 | else if (Dir == Dependence::DVEntry::EQ) |
156 | Direction = '='; |
157 | else |
158 | Direction = '*'; |
159 | Dep.push_back(Direction); |
160 | } |
161 | } |
162 | while (Dep.size() != Level) { |
163 | Dep.push_back('I'); |
164 | } |
165 | |
166 | DepMatrix.push_back(Dep); |
167 | if (DepMatrix.size() > MaxMemInstrCount) { |
168 | DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Cannot handle more than " << MaxMemInstrCount << " dependencies inside loop\n" ; } } while (false) |
169 | << " dependencies inside loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Cannot handle more than " << MaxMemInstrCount << " dependencies inside loop\n" ; } } while (false); |
170 | return false; |
171 | } |
172 | } |
173 | } |
174 | } |
175 | |
176 | return true; |
177 | } |
178 | |
179 | // A loop is moved from index 'from' to an index 'to'. Update the Dependence |
180 | // matrix by exchanging the two columns. |
181 | static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, |
182 | unsigned ToIndx) { |
183 | unsigned numRows = DepMatrix.size(); |
184 | for (unsigned i = 0; i < numRows; ++i) { |
185 | char TmpVal = DepMatrix[i][ToIndx]; |
186 | DepMatrix[i][ToIndx] = DepMatrix[i][FromIndx]; |
187 | DepMatrix[i][FromIndx] = TmpVal; |
188 | } |
189 | } |
190 | |
191 | // Checks if outermost non '=','S'or'I' dependence in the dependence matrix is |
192 | // '>' |
193 | static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row, |
194 | unsigned Column) { |
195 | for (unsigned i = 0; i <= Column; ++i) { |
196 | if (DepMatrix[Row][i] == '<') |
197 | return false; |
198 | if (DepMatrix[Row][i] == '>') |
199 | return true; |
200 | } |
201 | // All dependencies were '=','S' or 'I' |
202 | return false; |
203 | } |
204 | |
205 | // Checks if no dependence exist in the dependency matrix in Row before Column. |
206 | static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row, |
207 | unsigned Column) { |
208 | for (unsigned i = 0; i < Column; ++i) { |
209 | if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' && |
210 | DepMatrix[Row][i] != 'I') |
211 | return false; |
212 | } |
213 | return true; |
214 | } |
215 | |
216 | static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row, |
217 | unsigned OuterLoopId, char InnerDep, |
218 | char OuterDep) { |
219 | if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId)) |
220 | return false; |
221 | |
222 | if (InnerDep == OuterDep) |
223 | return true; |
224 | |
225 | // It is legal to interchange if and only if after interchange no row has a |
226 | // '>' direction as the leftmost non-'='. |
227 | |
228 | if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I') |
229 | return true; |
230 | |
231 | if (InnerDep == '<') |
232 | return true; |
233 | |
234 | if (InnerDep == '>') { |
235 | // If OuterLoopId represents outermost loop then interchanging will make the |
236 | // 1st dependency as '>' |
237 | if (OuterLoopId == 0) |
238 | return false; |
239 | |
240 | // If all dependencies before OuterloopId are '=','S'or 'I'. Then |
241 | // interchanging will result in this row having an outermost non '=' |
242 | // dependency of '>' |
243 | if (!containsNoDependence(DepMatrix, Row, OuterLoopId)) |
244 | return true; |
245 | } |
246 | |
247 | return false; |
248 | } |
249 | |
250 | // Checks if it is legal to interchange 2 loops. |
251 | // [Theorem] A permutation of the loops in a perfect nest is legal if and only |
252 | // if the direction matrix, after the same permutation is applied to its |
253 | // columns, has no ">" direction as the leftmost non-"=" direction in any row. |
254 | static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, |
255 | unsigned InnerLoopId, |
256 | unsigned OuterLoopId) { |
257 | unsigned NumRows = DepMatrix.size(); |
258 | // For each row check if it is valid to interchange. |
259 | for (unsigned Row = 0; Row < NumRows; ++Row) { |
260 | char InnerDep = DepMatrix[Row][InnerLoopId]; |
261 | char OuterDep = DepMatrix[Row][OuterLoopId]; |
262 | if (InnerDep == '*' || OuterDep == '*') |
263 | return false; |
264 | if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep)) |
265 | return false; |
266 | } |
267 | return true; |
268 | } |
269 | |
270 | static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) { |
271 | DEBUG(dbgs() << "Calling populateWorklist on Func: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: " << L.getHeader()->getParent()->getName() << " Loop: %" << L.getHeader()->getName() << '\n' ; } } while (false) |
272 | << L.getHeader()->getParent()->getName() << " Loop: %"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: " << L.getHeader()->getParent()->getName() << " Loop: %" << L.getHeader()->getName() << '\n' ; } } while (false) |
273 | << L.getHeader()->getName() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Calling populateWorklist on Func: " << L.getHeader()->getParent()->getName() << " Loop: %" << L.getHeader()->getName() << '\n' ; } } while (false); |
274 | LoopVector LoopList; |
275 | Loop *CurrentLoop = &L; |
276 | const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops(); |
277 | while (!Vec->empty()) { |
278 | // The current loop has multiple subloops in it hence it is not tightly |
279 | // nested. |
280 | // Discard all loops above it added into Worklist. |
281 | if (Vec->size() != 1) { |
282 | LoopList.clear(); |
283 | return; |
284 | } |
285 | LoopList.push_back(CurrentLoop); |
286 | CurrentLoop = Vec->front(); |
287 | Vec = &CurrentLoop->getSubLoops(); |
288 | } |
289 | LoopList.push_back(CurrentLoop); |
290 | V.push_back(std::move(LoopList)); |
291 | } |
292 | |
293 | static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) { |
294 | PHINode *InnerIndexVar = L->getCanonicalInductionVariable(); |
295 | if (InnerIndexVar) |
296 | return InnerIndexVar; |
297 | if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr) |
298 | return nullptr; |
299 | for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { |
300 | PHINode *PhiVar = cast<PHINode>(I); |
301 | Type *PhiTy = PhiVar->getType(); |
302 | if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && |
303 | !PhiTy->isPointerTy()) |
304 | return nullptr; |
305 | const SCEVAddRecExpr *AddRec = |
306 | dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar)); |
307 | if (!AddRec || !AddRec->isAffine()) |
308 | continue; |
309 | const SCEV *Step = AddRec->getStepRecurrence(*SE); |
310 | if (!isa<SCEVConstant>(Step)) |
311 | continue; |
312 | // Found the induction variable. |
313 | // FIXME: Handle loops with more than one induction variable. Note that, |
314 | // currently, legality makes sure we have only one induction variable. |
315 | return PhiVar; |
316 | } |
317 | return nullptr; |
318 | } |
319 | |
320 | namespace { |
321 | |
322 | /// LoopInterchangeLegality checks if it is legal to interchange the loop. |
323 | class LoopInterchangeLegality { |
324 | public: |
325 | LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE, |
326 | LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA, |
327 | OptimizationRemarkEmitter *ORE) |
328 | : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), |
329 | PreserveLCSSA(PreserveLCSSA), ORE(ORE) {} |
330 | |
331 | /// Check if the loops can be interchanged. |
332 | bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, |
333 | CharMatrix &DepMatrix); |
334 | |
335 | /// Check if the loop structure is understood. We do not handle triangular |
336 | /// loops for now. |
337 | bool isLoopStructureUnderstood(PHINode *InnerInductionVar); |
338 | |
339 | bool currentLimitations(); |
340 | |
341 | bool hasInnerLoopReduction() { return InnerLoopHasReduction; } |
342 | |
343 | private: |
344 | bool tightlyNested(Loop *Outer, Loop *Inner); |
345 | bool containsUnsafeInstructionsInHeader(BasicBlock *BB); |
346 | bool areAllUsesReductions(Instruction *Ins, Loop *L); |
347 | bool containsUnsafeInstructionsInLatch(BasicBlock *BB); |
348 | bool findInductionAndReductions(Loop *L, |
349 | SmallVector<PHINode *, 8> &Inductions, |
350 | SmallVector<PHINode *, 8> &Reductions); |
351 | |
352 | Loop *OuterLoop; |
353 | Loop *InnerLoop; |
354 | |
355 | ScalarEvolution *SE; |
356 | LoopInfo *LI; |
357 | DominatorTree *DT; |
358 | bool PreserveLCSSA; |
359 | |
360 | /// Interface to emit optimization remarks. |
361 | OptimizationRemarkEmitter *ORE; |
362 | |
363 | bool InnerLoopHasReduction = false; |
364 | }; |
365 | |
366 | /// LoopInterchangeProfitability checks if it is profitable to interchange the |
367 | /// loop. |
368 | class LoopInterchangeProfitability { |
369 | public: |
370 | LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE, |
371 | OptimizationRemarkEmitter *ORE) |
372 | : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {} |
373 | |
374 | /// Check if the loop interchange is profitable. |
375 | bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId, |
376 | CharMatrix &DepMatrix); |
377 | |
378 | private: |
379 | int getInstrOrderCost(); |
380 | |
381 | Loop *OuterLoop; |
382 | Loop *InnerLoop; |
383 | |
384 | /// Scev analysis. |
385 | ScalarEvolution *SE; |
386 | |
387 | /// Interface to emit optimization remarks. |
388 | OptimizationRemarkEmitter *ORE; |
389 | }; |
390 | |
391 | /// LoopInterchangeTransform interchanges the loop. |
392 | class LoopInterchangeTransform { |
393 | public: |
394 | LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE, |
395 | LoopInfo *LI, DominatorTree *DT, |
396 | BasicBlock *LoopNestExit, |
397 | bool InnerLoopContainsReductions) |
398 | : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), |
399 | LoopExit(LoopNestExit), |
400 | InnerLoopHasReduction(InnerLoopContainsReductions) {} |
401 | |
402 | /// Interchange OuterLoop and InnerLoop. |
403 | bool transform(); |
404 | void restructureLoops(Loop *InnerLoop, Loop *OuterLoop); |
405 | void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop); |
406 | |
407 | private: |
408 | void splitInnerLoopLatch(Instruction *); |
409 | void splitInnerLoopHeader(); |
410 | bool adjustLoopLinks(); |
411 | void adjustLoopPreheaders(); |
412 | bool adjustLoopBranches(); |
413 | void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred, |
414 | BasicBlock *NewPred); |
415 | |
416 | Loop *OuterLoop; |
417 | Loop *InnerLoop; |
418 | |
419 | /// Scev analysis. |
420 | ScalarEvolution *SE; |
421 | |
422 | LoopInfo *LI; |
423 | DominatorTree *DT; |
424 | BasicBlock *LoopExit; |
425 | bool InnerLoopHasReduction; |
426 | }; |
427 | |
428 | // Main LoopInterchange Pass. |
429 | struct LoopInterchange : public FunctionPass { |
430 | static char ID; |
431 | ScalarEvolution *SE = nullptr; |
432 | LoopInfo *LI = nullptr; |
433 | DependenceInfo *DI = nullptr; |
434 | DominatorTree *DT = nullptr; |
435 | bool PreserveLCSSA; |
436 | |
437 | /// Interface to emit optimization remarks. |
438 | OptimizationRemarkEmitter *ORE; |
439 | |
440 | LoopInterchange() : FunctionPass(ID) { |
441 | initializeLoopInterchangePass(*PassRegistry::getPassRegistry()); |
442 | } |
443 | |
444 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
445 | AU.addRequired<ScalarEvolutionWrapperPass>(); |
446 | AU.addRequired<AAResultsWrapperPass>(); |
447 | AU.addRequired<DominatorTreeWrapperPass>(); |
448 | AU.addRequired<LoopInfoWrapperPass>(); |
449 | AU.addRequired<DependenceAnalysisWrapperPass>(); |
450 | AU.addRequiredID(LoopSimplifyID); |
451 | AU.addRequiredID(LCSSAID); |
452 | AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); |
453 | |
454 | AU.addPreserved<DominatorTreeWrapperPass>(); |
455 | } |
456 | |
457 | bool runOnFunction(Function &F) override { |
458 | if (skipFunction(F)) |
459 | return false; |
460 | |
461 | SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); |
462 | LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
463 | DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI(); |
464 | DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
465 | ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); |
466 | PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); |
467 | |
468 | // Build up a worklist of loop pairs to analyze. |
469 | SmallVector<LoopVector, 8> Worklist; |
470 | |
471 | for (Loop *L : *LI) |
472 | populateWorklist(*L, Worklist); |
473 | |
474 | DEBUG(dbgs() << "Worklist size = " << Worklist.size() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Worklist size = " << Worklist.size() << "\n"; } } while (false); |
475 | bool Changed = true; |
476 | while (!Worklist.empty()) { |
477 | LoopVector LoopList = Worklist.pop_back_val(); |
478 | Changed = processLoopList(LoopList, F); |
479 | } |
480 | return Changed; |
481 | } |
482 | |
483 | bool isComputableLoopNest(LoopVector LoopList) { |
484 | for (Loop *L : LoopList) { |
485 | const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L); |
486 | if (ExitCountOuter == SE->getCouldNotCompute()) { |
487 | DEBUG(dbgs() << "Couldn't compute backedge count\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Couldn't compute backedge count\n" ; } } while (false); |
488 | return false; |
489 | } |
490 | if (L->getNumBackEdges() != 1) { |
491 | DEBUG(dbgs() << "NumBackEdges is not equal to 1\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "NumBackEdges is not equal to 1\n" ; } } while (false); |
492 | return false; |
493 | } |
494 | if (!L->getExitingBlock()) { |
495 | DEBUG(dbgs() << "Loop doesn't have unique exit block\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loop doesn't have unique exit block\n" ; } } while (false); |
496 | return false; |
497 | } |
498 | } |
499 | return true; |
500 | } |
501 | |
502 | unsigned selectLoopForInterchange(const LoopVector &LoopList) { |
503 | // TODO: Add a better heuristic to select the loop to be interchanged based |
504 | // on the dependence matrix. Currently we select the innermost loop. |
505 | return LoopList.size() - 1; |
506 | } |
507 | |
508 | bool processLoopList(LoopVector LoopList, Function &F) { |
509 | bool Changed = false; |
510 | unsigned LoopNestDepth = LoopList.size(); |
511 | if (LoopNestDepth < 2) { |
512 | DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loop doesn't contain minimum nesting level.\n" ; } } while (false); |
513 | return false; |
514 | } |
515 | if (LoopNestDepth > MaxLoopNestDepth) { |
516 | DEBUG(dbgs() << "Cannot handle loops of depth greater than "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Cannot handle loops of depth greater than " << MaxLoopNestDepth << "\n"; } } while (false) |
517 | << MaxLoopNestDepth << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Cannot handle loops of depth greater than " << MaxLoopNestDepth << "\n"; } } while (false); |
518 | return false; |
519 | } |
520 | if (!isComputableLoopNest(LoopList)) { |
521 | DEBUG(dbgs() << "Not valid loop candidate for interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Not valid loop candidate for interchange\n" ; } } while (false); |
522 | return false; |
523 | } |
524 | |
525 | DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Processing LoopList of size = " << LoopNestDepth << "\n"; } } while (false); |
526 | |
527 | CharMatrix DependencyMatrix; |
528 | Loop *OuterMostLoop = *(LoopList.begin()); |
529 | if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth, |
530 | OuterMostLoop, DI)) { |
531 | DEBUG(dbgs() << "Populating dependency matrix failed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Populating dependency matrix failed\n" ; } } while (false); |
532 | return false; |
533 | } |
534 | #ifdef DUMP_DEP_MATRICIES |
535 | DEBUG(dbgs() << "Dependence before interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Dependence before interchange\n" ; } } while (false); |
536 | printDepMatrix(DependencyMatrix); |
537 | #endif |
538 | |
539 | BasicBlock *OuterMostLoopLatch = OuterMostLoop->getLoopLatch(); |
540 | BranchInst *OuterMostLoopLatchBI = |
541 | dyn_cast<BranchInst>(OuterMostLoopLatch->getTerminator()); |
542 | if (!OuterMostLoopLatchBI || OuterMostLoopLatchBI->getNumSuccessors() != 2) |
543 | return false; |
544 | |
545 | // Since we currently do not handle LCSSA PHI's any failure in loop |
546 | // condition will now branch to LoopNestExit. |
547 | // TODO: This should be removed once we handle LCSSA PHI nodes. |
548 | |
549 | // Get the Outermost loop exit. |
550 | BasicBlock *LoopNestExit; |
551 | if (OuterMostLoopLatchBI->getSuccessor(0) == OuterMostLoop->getHeader()) |
552 | LoopNestExit = OuterMostLoopLatchBI->getSuccessor(1); |
553 | else |
554 | LoopNestExit = OuterMostLoopLatchBI->getSuccessor(0); |
555 | |
556 | if (isa<PHINode>(LoopNestExit->begin())) { |
557 | DEBUG(dbgs() << "PHI Nodes in loop nest exit is not handled for now "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "PHI Nodes in loop nest exit is not handled for now " "since on failure all loops branch to loop nest exit.\n"; } } while (false) |
558 | "since on failure all loops branch to loop nest exit.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "PHI Nodes in loop nest exit is not handled for now " "since on failure all loops branch to loop nest exit.\n"; } } while (false); |
559 | return false; |
560 | } |
561 | |
562 | unsigned SelecLoopId = selectLoopForInterchange(LoopList); |
563 | // Move the selected loop outwards to the best possible position. |
564 | for (unsigned i = SelecLoopId; i > 0; i--) { |
565 | bool Interchanged = |
566 | processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix); |
567 | if (!Interchanged) |
568 | return Changed; |
569 | // Loops interchanged reflect the same in LoopList |
570 | std::swap(LoopList[i - 1], LoopList[i]); |
571 | |
572 | // Update the DependencyMatrix |
573 | interChangeDependencies(DependencyMatrix, i, i - 1); |
574 | #ifdef DUMP_DEP_MATRICIES |
575 | DEBUG(dbgs() << "Dependence after interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Dependence after interchange\n" ; } } while (false); |
576 | printDepMatrix(DependencyMatrix); |
577 | #endif |
578 | Changed |= Interchanged; |
579 | } |
580 | return Changed; |
581 | } |
582 | |
583 | bool processLoop(LoopVector LoopList, unsigned InnerLoopId, |
584 | unsigned OuterLoopId, BasicBlock *LoopNestExit, |
585 | std::vector<std::vector<char>> &DependencyMatrix) { |
586 | DEBUG(dbgs() << "Processing Inner Loop Id = " << InnerLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Processing Inner Loop Id = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << "\n"; } } while (false) |
587 | << " and OuterLoopId = " << OuterLoopId << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Processing Inner Loop Id = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << "\n"; } } while (false); |
588 | Loop *InnerLoop = LoopList[InnerLoopId]; |
589 | Loop *OuterLoop = LoopList[OuterLoopId]; |
590 | |
591 | LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, LI, DT, |
592 | PreserveLCSSA, ORE); |
593 | if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) { |
594 | DEBUG(dbgs() << "Not interchanging Loops. Cannot prove legality\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Not interchanging Loops. Cannot prove legality\n" ; } } while (false); |
595 | return false; |
596 | } |
597 | DEBUG(dbgs() << "Loops are legal to interchange\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loops are legal to interchange\n" ; } } while (false); |
598 | LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE); |
599 | if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) { |
600 | DEBUG(dbgs() << "Interchanging loops not profitable\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Interchanging loops not profitable\n" ; } } while (false); |
601 | return false; |
602 | } |
603 | |
604 | ORE->emit([&]() { |
605 | return OptimizationRemark(DEBUG_TYPE"loop-interchange", "Interchanged", |
606 | InnerLoop->getStartLoc(), |
607 | InnerLoop->getHeader()) |
608 | << "Loop interchanged with enclosing loop."; |
609 | }); |
610 | |
611 | LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, |
612 | LoopNestExit, LIL.hasInnerLoopReduction()); |
613 | LIT.transform(); |
614 | DEBUG(dbgs() << "Loops interchanged\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loops interchanged\n" ; } } while (false); |
615 | return true; |
616 | } |
617 | }; |
618 | |
619 | } // end anonymous namespace |
620 | |
621 | bool LoopInterchangeLegality::areAllUsesReductions(Instruction *Ins, Loop *L) { |
622 | return llvm::none_of(Ins->users(), [=](User *U) -> bool { |
623 | auto *UserIns = dyn_cast<PHINode>(U); |
624 | RecurrenceDescriptor RD; |
625 | return !UserIns || !RecurrenceDescriptor::isReductionPHI(UserIns, L, RD); |
626 | }); |
627 | } |
628 | |
629 | bool LoopInterchangeLegality::containsUnsafeInstructionsInHeader( |
630 | BasicBlock *BB) { |
631 | for (auto I = BB->begin(), E = BB->end(); I != E; ++I) { |
632 | // Load corresponding to reduction PHI's are safe while concluding if |
633 | // tightly nested. |
634 | if (LoadInst *L = dyn_cast<LoadInst>(I)) { |
635 | if (!areAllUsesReductions(L, InnerLoop)) |
636 | return true; |
637 | } else if (I->mayHaveSideEffects() || I->mayReadFromMemory()) |
638 | return true; |
639 | } |
640 | return false; |
641 | } |
642 | |
643 | bool LoopInterchangeLegality::containsUnsafeInstructionsInLatch( |
644 | BasicBlock *BB) { |
645 | for (auto I = BB->begin(), E = BB->end(); I != E; ++I) { |
646 | // Stores corresponding to reductions are safe while concluding if tightly |
647 | // nested. |
648 | if (StoreInst *L = dyn_cast<StoreInst>(I)) { |
649 | if (!isa<PHINode>(L->getOperand(0))) |
650 | return true; |
651 | } else if (I->mayHaveSideEffects() || I->mayReadFromMemory()) |
652 | return true; |
653 | } |
654 | return false; |
655 | } |
656 | |
657 | bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) { |
658 | BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); |
659 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); |
660 | BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); |
661 | |
662 | DEBUG(dbgs() << "Checking if loops are tightly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Checking if loops are tightly nested\n" ; } } while (false); |
663 | |
664 | // A perfectly nested loop will not have any branch in between the outer and |
665 | // inner block i.e. outer header will branch to either inner preheader and |
666 | // outerloop latch. |
667 | BranchInst *OuterLoopHeaderBI = |
668 | dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); |
669 | if (!OuterLoopHeaderBI) |
670 | return false; |
671 | |
672 | for (BasicBlock *Succ : OuterLoopHeaderBI->successors()) |
673 | if (Succ != InnerLoopPreHeader && Succ != OuterLoopLatch) |
674 | return false; |
675 | |
676 | DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Checking instructions in Loop header and Loop latch\n" ; } } while (false); |
677 | // We do not have any basic block in between now make sure the outer header |
678 | // and outer loop latch doesn't contain any unsafe instructions. |
679 | if (containsUnsafeInstructionsInHeader(OuterLoopHeader) || |
680 | containsUnsafeInstructionsInLatch(OuterLoopLatch)) |
681 | return false; |
682 | |
683 | DEBUG(dbgs() << "Loops are perfectly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loops are perfectly nested\n" ; } } while (false); |
684 | // We have a perfect loop nest. |
685 | return true; |
686 | } |
687 | |
688 | bool LoopInterchangeLegality::isLoopStructureUnderstood( |
689 | PHINode *InnerInduction) { |
690 | unsigned Num = InnerInduction->getNumOperands(); |
691 | BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader(); |
692 | for (unsigned i = 0; i < Num; ++i) { |
693 | Value *Val = InnerInduction->getOperand(i); |
694 | if (isa<Constant>(Val)) |
695 | continue; |
696 | Instruction *I = dyn_cast<Instruction>(Val); |
697 | if (!I) |
698 | return false; |
699 | // TODO: Handle triangular loops. |
700 | // e.g. for(int i=0;i<N;i++) |
701 | // for(int j=i;j<N;j++) |
702 | unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i); |
703 | if (InnerInduction->getIncomingBlock(IncomBlockIndx) == |
704 | InnerLoopPreheader && |
705 | !OuterLoop->isLoopInvariant(I)) { |
706 | return false; |
707 | } |
708 | } |
709 | return true; |
710 | } |
711 | |
712 | bool LoopInterchangeLegality::findInductionAndReductions( |
713 | Loop *L, SmallVector<PHINode *, 8> &Inductions, |
714 | SmallVector<PHINode *, 8> &Reductions) { |
715 | if (!L->getLoopLatch() || !L->getLoopPredecessor()) |
716 | return false; |
717 | for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { |
718 | RecurrenceDescriptor RD; |
719 | InductionDescriptor ID; |
720 | PHINode *PHI = cast<PHINode>(I); |
721 | if (InductionDescriptor::isInductionPHI(PHI, L, SE, ID)) |
722 | Inductions.push_back(PHI); |
723 | else if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) |
724 | Reductions.push_back(PHI); |
725 | else { |
726 | DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n" ; } } while (false) |
727 | dbgs() << "Failed to recognize PHI as an induction or reduction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Failed to recognize PHI as an induction or reduction.\n" ; } } while (false); |
728 | return false; |
729 | } |
730 | } |
731 | return true; |
732 | } |
733 | |
734 | static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock) { |
735 | for (auto I = Block->begin(); isa<PHINode>(I); ++I) { |
736 | PHINode *PHI = cast<PHINode>(I); |
737 | // Reduction lcssa phi will have only 1 incoming block that from loop latch. |
738 | if (PHI->getNumIncomingValues() > 1) |
739 | return false; |
740 | Instruction *Ins = dyn_cast<Instruction>(PHI->getIncomingValue(0)); |
741 | if (!Ins) |
742 | return false; |
743 | // Incoming value for lcssa phi's in outer loop exit can only be inner loop |
744 | // exits lcssa phi else it would not be tightly nested. |
745 | if (!isa<PHINode>(Ins) && isOuterLoopExitBlock) |
746 | return false; |
747 | } |
748 | return true; |
749 | } |
750 | |
751 | static BasicBlock *getLoopLatchExitBlock(BasicBlock *LatchBlock, |
752 | BasicBlock *LoopHeader) { |
753 | if (BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator())) { |
754 | assert(BI->getNumSuccessors() == 2 &&(static_cast <bool> (BI->getNumSuccessors() == 2 && "Branch leaving loop latch must have 2 successors") ? void ( 0) : __assert_fail ("BI->getNumSuccessors() == 2 && \"Branch leaving loop latch must have 2 successors\"" , "/build/llvm-toolchain-snapshot-7~svn326551/lib/Transforms/Scalar/LoopInterchange.cpp" , 755, __extension__ __PRETTY_FUNCTION__)) |
755 | "Branch leaving loop latch must have 2 successors")(static_cast <bool> (BI->getNumSuccessors() == 2 && "Branch leaving loop latch must have 2 successors") ? void ( 0) : __assert_fail ("BI->getNumSuccessors() == 2 && \"Branch leaving loop latch must have 2 successors\"" , "/build/llvm-toolchain-snapshot-7~svn326551/lib/Transforms/Scalar/LoopInterchange.cpp" , 755, __extension__ __PRETTY_FUNCTION__)); |
756 | for (BasicBlock *Succ : BI->successors()) { |
757 | if (Succ == LoopHeader) |
758 | continue; |
759 | return Succ; |
760 | } |
761 | } |
762 | return nullptr; |
763 | } |
764 | |
765 | // This function indicates the current limitations in the transform as a result |
766 | // of which we do not proceed. |
767 | bool LoopInterchangeLegality::currentLimitations() { |
768 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); |
769 | BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); |
770 | BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); |
771 | BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); |
772 | BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); |
773 | |
774 | PHINode *InnerInductionVar; |
775 | SmallVector<PHINode *, 8> Inductions; |
776 | SmallVector<PHINode *, 8> Reductions; |
777 | if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) { |
778 | DEBUG(dbgs() << "Only inner loops with induction or reduction PHI nodes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes " << "are supported currently.\n"; } } while (false) |
779 | << "are supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Only inner loops with induction or reduction PHI nodes " << "are supported currently.\n"; } } while (false); |
780 | ORE->emit([&]() { |
781 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIInner", |
782 | InnerLoop->getStartLoc(), |
783 | InnerLoop->getHeader()) |
784 | << "Only inner loops with induction or reduction PHI nodes can be" |
785 | " interchange currently."; |
786 | }); |
787 | return true; |
788 | } |
789 | |
790 | // TODO: Currently we handle only loops with 1 induction variable. |
791 | if (Inductions.size() != 1) { |
792 | DEBUG(dbgs() << "We currently only support loops with 1 induction variable."do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "We currently only support loops with 1 induction variable." << "Failed to interchange due to current limitation\n" ; } } while (false) |
793 | << "Failed to interchange due to current limitation\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "We currently only support loops with 1 induction variable." << "Failed to interchange due to current limitation\n" ; } } while (false); |
794 | ORE->emit([&]() { |
795 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "MultiInductionInner", |
796 | InnerLoop->getStartLoc(), |
797 | InnerLoop->getHeader()) |
798 | << "Only inner loops with 1 induction variable can be " |
799 | "interchanged currently."; |
800 | }); |
801 | return true; |
802 | } |
803 | if (Reductions.size() > 0) |
804 | InnerLoopHasReduction = true; |
805 | |
806 | InnerInductionVar = Inductions.pop_back_val(); |
807 | Reductions.clear(); |
808 | if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) { |
809 | DEBUG(dbgs() << "Only outer loops with induction or reduction PHI nodes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes " << "are supported currently.\n"; } } while (false) |
810 | << "are supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Only outer loops with induction or reduction PHI nodes " << "are supported currently.\n"; } } while (false); |
811 | ORE->emit([&]() { |
812 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIOuter", |
813 | OuterLoop->getStartLoc(), |
814 | OuterLoop->getHeader()) |
815 | << "Only outer loops with induction or reduction PHI nodes can be" |
816 | " interchanged currently."; |
817 | }); |
818 | return true; |
819 | } |
820 | |
821 | // Outer loop cannot have reduction because then loops will not be tightly |
822 | // nested. |
823 | if (!Reductions.empty()) { |
824 | DEBUG(dbgs() << "Outer loops with reductions are not supported "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Outer loops with reductions are not supported " << "currently.\n"; } } while (false) |
825 | << "currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Outer loops with reductions are not supported " << "currently.\n"; } } while (false); |
826 | ORE->emit([&]() { |
827 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "ReductionsOuter", |
828 | OuterLoop->getStartLoc(), |
829 | OuterLoop->getHeader()) |
830 | << "Outer loops with reductions cannot be interchangeed " |
831 | "currently."; |
832 | }); |
833 | return true; |
834 | } |
835 | // TODO: Currently we handle only loops with 1 induction variable. |
836 | if (Inductions.size() != 1) { |
837 | DEBUG(dbgs() << "Loops with more than 1 induction variables are not "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loops with more than 1 induction variables are not " << "supported currently.\n"; } } while (false) |
838 | << "supported currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loops with more than 1 induction variables are not " << "supported currently.\n"; } } while (false); |
839 | ORE->emit([&]() { |
840 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "MultiIndutionOuter", |
841 | OuterLoop->getStartLoc(), |
842 | OuterLoop->getHeader()) |
843 | << "Only outer loops with 1 induction variable can be " |
844 | "interchanged currently."; |
845 | }); |
846 | return true; |
847 | } |
848 | |
849 | // TODO: Triangular loops are not handled for now. |
850 | if (!isLoopStructureUnderstood(InnerInductionVar)) { |
851 | DEBUG(dbgs() << "Loop structure not understood by pass\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loop structure not understood by pass\n" ; } } while (false); |
852 | ORE->emit([&]() { |
853 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedStructureInner", |
854 | InnerLoop->getStartLoc(), |
855 | InnerLoop->getHeader()) |
856 | << "Inner loop structure not understood currently."; |
857 | }); |
858 | return true; |
859 | } |
860 | |
861 | // TODO: We only handle LCSSA PHI's corresponding to reduction for now. |
862 | BasicBlock *LoopExitBlock = |
863 | getLoopLatchExitBlock(OuterLoopLatch, OuterLoopHeader); |
864 | if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true)) { |
865 | DEBUG(dbgs() << "Can only handle LCSSA PHIs in outer loops currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Can only handle LCSSA PHIs in outer loops currently.\n" ; } } while (false); |
866 | ORE->emit([&]() { |
867 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NoLCSSAPHIOuter", |
868 | OuterLoop->getStartLoc(), |
869 | OuterLoop->getHeader()) |
870 | << "Only outer loops with LCSSA PHIs can be interchange " |
871 | "currently."; |
872 | }); |
873 | return true; |
874 | } |
875 | |
876 | LoopExitBlock = getLoopLatchExitBlock(InnerLoopLatch, InnerLoopHeader); |
877 | if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false)) { |
878 | DEBUG(dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n" ; } } while (false); |
879 | ORE->emit([&]() { |
880 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NoLCSSAPHIOuterInner", |
881 | InnerLoop->getStartLoc(), |
882 | InnerLoop->getHeader()) |
883 | << "Only inner loops with LCSSA PHIs can be interchange " |
884 | "currently."; |
885 | }); |
886 | return true; |
887 | } |
888 | |
889 | // TODO: Current limitation: Since we split the inner loop latch at the point |
890 | // were induction variable is incremented (induction.next); We cannot have |
891 | // more than 1 user of induction.next since it would result in broken code |
892 | // after split. |
893 | // e.g. |
894 | // for(i=0;i<N;i++) { |
895 | // for(j = 0;j<M;j++) { |
896 | // A[j+1][i+2] = A[j][i]+k; |
897 | // } |
898 | // } |
899 | Instruction *InnerIndexVarInc = nullptr; |
900 | if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader) |
901 | InnerIndexVarInc = |
902 | dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1)); |
903 | else |
904 | InnerIndexVarInc = |
905 | dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0)); |
906 | |
907 | if (!InnerIndexVarInc) { |
908 | DEBUG(dbgs() << "Did not find an instruction to increment the induction "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Did not find an instruction to increment the induction " << "variable.\n"; } } while (false) |
909 | << "variable.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Did not find an instruction to increment the induction " << "variable.\n"; } } while (false); |
910 | ORE->emit([&]() { |
911 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NoIncrementInInner", |
912 | InnerLoop->getStartLoc(), |
913 | InnerLoop->getHeader()) |
914 | << "The inner loop does not increment the induction variable."; |
915 | }); |
916 | return true; |
917 | } |
918 | |
919 | // Since we split the inner loop latch on this induction variable. Make sure |
920 | // we do not have any instruction between the induction variable and branch |
921 | // instruction. |
922 | |
923 | bool FoundInduction = false; |
924 | for (const Instruction &I : llvm::reverse(*InnerLoopLatch)) { |
925 | if (isa<BranchInst>(I) || isa<CmpInst>(I) || isa<TruncInst>(I) || |
926 | isa<ZExtInst>(I)) |
927 | continue; |
928 | |
929 | // We found an instruction. If this is not induction variable then it is not |
930 | // safe to split this loop latch. |
931 | if (!I.isIdenticalTo(InnerIndexVarInc)) { |
932 | DEBUG(dbgs() << "Found unsupported instructions between induction "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Found unsupported instructions between induction " << "variable increment and branch.\n"; } } while (false ) |
933 | << "variable increment and branch.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Found unsupported instructions between induction " << "variable increment and branch.\n"; } } while (false ); |
934 | ORE->emit([&]() { |
935 | return OptimizationRemarkMissed( |
936 | DEBUG_TYPE"loop-interchange", "UnsupportedInsBetweenInduction", |
937 | InnerLoop->getStartLoc(), InnerLoop->getHeader()) |
938 | << "Found unsupported instruction between induction variable " |
939 | "increment and branch."; |
940 | }); |
941 | return true; |
942 | } |
943 | |
944 | FoundInduction = true; |
945 | break; |
946 | } |
947 | // The loop latch ended and we didn't find the induction variable return as |
948 | // current limitation. |
949 | if (!FoundInduction) { |
950 | DEBUG(dbgs() << "Did not find the induction variable.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Did not find the induction variable.\n" ; } } while (false); |
951 | ORE->emit([&]() { |
952 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NoIndutionVariable", |
953 | InnerLoop->getStartLoc(), |
954 | InnerLoop->getHeader()) |
955 | << "Did not find the induction variable."; |
956 | }); |
957 | return true; |
958 | } |
959 | return false; |
960 | } |
961 | |
962 | bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, |
963 | unsigned OuterLoopId, |
964 | CharMatrix &DepMatrix) { |
965 | if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) { |
966 | DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << " due to dependence\n"; } } while (false ) |
967 | << " and OuterLoopId = " << OuterLoopIddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << " due to dependence\n"; } } while (false ) |
968 | << " due to dependence\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << " due to dependence\n"; } } while (false ); |
969 | ORE->emit([&]() { |
970 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "Dependence", |
971 | InnerLoop->getStartLoc(), |
972 | InnerLoop->getHeader()) |
973 | << "Cannot interchange loops due to dependences."; |
974 | }); |
975 | return false; |
976 | } |
977 | |
978 | // Check if outer and inner loop contain legal instructions only. |
979 | for (auto *BB : OuterLoop->blocks()) |
980 | for (Instruction &I : *BB) |
981 | if (CallInst *CI = dyn_cast<CallInst>(&I)) { |
982 | // readnone functions do not prevent interchanging. |
983 | if (CI->doesNotReadMemory()) |
984 | continue; |
985 | DEBUG(dbgs() << "Loops with call instructions cannot be interchanged "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged " << "safely."; } } while (false) |
986 | << "safely.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loops with call instructions cannot be interchanged " << "safely."; } } while (false); |
987 | return false; |
988 | } |
989 | |
990 | // Create unique Preheaders if we already do not have one. |
991 | BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); |
992 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); |
993 | |
994 | // Create a unique outer preheader - |
995 | // 1) If OuterLoop preheader is not present. |
996 | // 2) If OuterLoop Preheader is same as OuterLoop Header |
997 | // 3) If OuterLoop Preheader is same as Header of the previous loop. |
998 | // 4) If OuterLoop Preheader is Entry node. |
999 | if (!OuterLoopPreHeader || OuterLoopPreHeader == OuterLoop->getHeader() || |
1000 | isa<PHINode>(OuterLoopPreHeader->begin()) || |
1001 | !OuterLoopPreHeader->getUniquePredecessor()) { |
1002 | OuterLoopPreHeader = |
Value stored to 'OuterLoopPreHeader' is never read | |
1003 | InsertPreheaderForLoop(OuterLoop, DT, LI, PreserveLCSSA); |
1004 | } |
1005 | |
1006 | if (!InnerLoopPreHeader || InnerLoopPreHeader == InnerLoop->getHeader() || |
1007 | InnerLoopPreHeader == OuterLoop->getHeader()) { |
1008 | InnerLoopPreHeader = |
1009 | InsertPreheaderForLoop(InnerLoop, DT, LI, PreserveLCSSA); |
1010 | } |
1011 | |
1012 | // TODO: The loops could not be interchanged due to current limitations in the |
1013 | // transform module. |
1014 | if (currentLimitations()) { |
1015 | DEBUG(dbgs() << "Not legal because of current transform limitation\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Not legal because of current transform limitation\n" ; } } while (false); |
1016 | return false; |
1017 | } |
1018 | |
1019 | // Check if the loops are tightly nested. |
1020 | if (!tightlyNested(OuterLoop, InnerLoop)) { |
1021 | DEBUG(dbgs() << "Loops not tightly nested\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Loops not tightly nested\n" ; } } while (false); |
1022 | ORE->emit([&]() { |
1023 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NotTightlyNested", |
1024 | InnerLoop->getStartLoc(), |
1025 | InnerLoop->getHeader()) |
1026 | << "Cannot interchange loops because they are not tightly " |
1027 | "nested."; |
1028 | }); |
1029 | return false; |
1030 | } |
1031 | |
1032 | return true; |
1033 | } |
1034 | |
1035 | int LoopInterchangeProfitability::getInstrOrderCost() { |
1036 | unsigned GoodOrder, BadOrder; |
1037 | BadOrder = GoodOrder = 0; |
1038 | for (BasicBlock *BB : InnerLoop->blocks()) { |
1039 | for (Instruction &Ins : *BB) { |
1040 | if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) { |
1041 | unsigned NumOp = GEP->getNumOperands(); |
1042 | bool FoundInnerInduction = false; |
1043 | bool FoundOuterInduction = false; |
1044 | for (unsigned i = 0; i < NumOp; ++i) { |
1045 | const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i)); |
1046 | const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal); |
1047 | if (!AR) |
1048 | continue; |
1049 | |
1050 | // If we find the inner induction after an outer induction e.g. |
1051 | // for(int i=0;i<N;i++) |
1052 | // for(int j=0;j<N;j++) |
1053 | // A[i][j] = A[i-1][j-1]+k; |
1054 | // then it is a good order. |
1055 | if (AR->getLoop() == InnerLoop) { |
1056 | // We found an InnerLoop induction after OuterLoop induction. It is |
1057 | // a good order. |
1058 | FoundInnerInduction = true; |
1059 | if (FoundOuterInduction) { |
1060 | GoodOrder++; |
1061 | break; |
1062 | } |
1063 | } |
1064 | // If we find the outer induction after an inner induction e.g. |
1065 | // for(int i=0;i<N;i++) |
1066 | // for(int j=0;j<N;j++) |
1067 | // A[j][i] = A[j-1][i-1]+k; |
1068 | // then it is a bad order. |
1069 | if (AR->getLoop() == OuterLoop) { |
1070 | // We found an OuterLoop induction after InnerLoop induction. It is |
1071 | // a bad order. |
1072 | FoundOuterInduction = true; |
1073 | if (FoundInnerInduction) { |
1074 | BadOrder++; |
1075 | break; |
1076 | } |
1077 | } |
1078 | } |
1079 | } |
1080 | } |
1081 | } |
1082 | return GoodOrder - BadOrder; |
1083 | } |
1084 | |
1085 | static bool isProfitableForVectorization(unsigned InnerLoopId, |
1086 | unsigned OuterLoopId, |
1087 | CharMatrix &DepMatrix) { |
1088 | // TODO: Improve this heuristic to catch more cases. |
1089 | // If the inner loop is loop independent or doesn't carry any dependency it is |
1090 | // profitable to move this to outer position. |
1091 | for (auto &Row : DepMatrix) { |
1092 | if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I') |
1093 | return false; |
1094 | // TODO: We need to improve this heuristic. |
1095 | if (Row[OuterLoopId] != '=') |
1096 | return false; |
1097 | } |
1098 | // If outer loop has dependence and inner loop is loop independent then it is |
1099 | // profitable to interchange to enable parallelism. |
1100 | return true; |
1101 | } |
1102 | |
1103 | bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId, |
1104 | unsigned OuterLoopId, |
1105 | CharMatrix &DepMatrix) { |
1106 | // TODO: Add better profitability checks. |
1107 | // e.g |
1108 | // 1) Construct dependency matrix and move the one with no loop carried dep |
1109 | // inside to enable vectorization. |
1110 | |
1111 | // This is rough cost estimation algorithm. It counts the good and bad order |
1112 | // of induction variables in the instruction and allows reordering if number |
1113 | // of bad orders is more than good. |
1114 | int Cost = getInstrOrderCost(); |
1115 | DEBUG(dbgs() << "Cost = " << Cost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Cost = " << Cost << "\n"; } } while (false); |
1116 | if (Cost < -LoopInterchangeCostThreshold) |
1117 | return true; |
1118 | |
1119 | // It is not profitable as per current cache profitability model. But check if |
1120 | // we can move this loop outside to improve parallelism. |
1121 | if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix)) |
1122 | return true; |
1123 | |
1124 | ORE->emit([&]() { |
1125 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "InterchangeNotProfitable", |
1126 | InnerLoop->getStartLoc(), |
1127 | InnerLoop->getHeader()) |
1128 | << "Interchanging loops is too costly (cost=" |
1129 | << ore::NV("Cost", Cost) << ", threshold=" |
1130 | << ore::NV("Threshold", LoopInterchangeCostThreshold) |
1131 | << ") and it does not improve parallelism."; |
1132 | }); |
1133 | return false; |
1134 | } |
1135 | |
1136 | void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop, |
1137 | Loop *InnerLoop) { |
1138 | for (Loop::iterator I = OuterLoop->begin(), E = OuterLoop->end(); I != E; |
1139 | ++I) { |
1140 | if (*I == InnerLoop) { |
1141 | OuterLoop->removeChildLoop(I); |
1142 | return; |
1143 | } |
1144 | } |
1145 | llvm_unreachable("Couldn't find loop")::llvm::llvm_unreachable_internal("Couldn't find loop", "/build/llvm-toolchain-snapshot-7~svn326551/lib/Transforms/Scalar/LoopInterchange.cpp" , 1145); |
1146 | } |
1147 | |
1148 | void LoopInterchangeTransform::restructureLoops(Loop *InnerLoop, |
1149 | Loop *OuterLoop) { |
1150 | Loop *OuterLoopParent = OuterLoop->getParentLoop(); |
1151 | if (OuterLoopParent) { |
1152 | // Remove the loop from its parent loop. |
1153 | removeChildLoop(OuterLoopParent, OuterLoop); |
1154 | removeChildLoop(OuterLoop, InnerLoop); |
1155 | OuterLoopParent->addChildLoop(InnerLoop); |
1156 | } else { |
1157 | removeChildLoop(OuterLoop, InnerLoop); |
1158 | LI->changeTopLevelLoop(OuterLoop, InnerLoop); |
1159 | } |
1160 | |
1161 | while (!InnerLoop->empty()) |
1162 | OuterLoop->addChildLoop(InnerLoop->removeChildLoop(InnerLoop->begin())); |
1163 | |
1164 | InnerLoop->addChildLoop(OuterLoop); |
1165 | } |
1166 | |
1167 | bool LoopInterchangeTransform::transform() { |
1168 | bool Transformed = false; |
1169 | Instruction *InnerIndexVar; |
1170 | |
1171 | if (InnerLoop->getSubLoops().empty()) { |
1172 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); |
1173 | DEBUG(dbgs() << "Calling Split Inner Loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Calling Split Inner Loop\n" ; } } while (false); |
1174 | PHINode *InductionPHI = getInductionVariable(InnerLoop, SE); |
1175 | if (!InductionPHI) { |
1176 | DEBUG(dbgs() << "Failed to find the point to split loop latch \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Failed to find the point to split loop latch \n" ; } } while (false); |
1177 | return false; |
1178 | } |
1179 | |
1180 | if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader) |
1181 | InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(1)); |
1182 | else |
1183 | InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0)); |
1184 | |
1185 | // Ensure that InductionPHI is the first Phi node as required by |
1186 | // splitInnerLoopHeader |
1187 | if (&InductionPHI->getParent()->front() != InductionPHI) |
1188 | InductionPHI->moveBefore(&InductionPHI->getParent()->front()); |
1189 | |
1190 | // Split at the place were the induction variable is |
1191 | // incremented/decremented. |
1192 | // TODO: This splitting logic may not work always. Fix this. |
1193 | splitInnerLoopLatch(InnerIndexVar); |
1194 | DEBUG(dbgs() << "splitInnerLoopLatch done\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "splitInnerLoopLatch done\n" ; } } while (false); |
1195 | |
1196 | // Splits the inner loops phi nodes out into a separate basic block. |
1197 | splitInnerLoopHeader(); |
1198 | DEBUG(dbgs() << "splitInnerLoopHeader done\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "splitInnerLoopHeader done\n" ; } } while (false); |
1199 | } |
1200 | |
1201 | Transformed |= adjustLoopLinks(); |
1202 | if (!Transformed) { |
1203 | DEBUG(dbgs() << "adjustLoopLinks failed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "adjustLoopLinks failed\n" ; } } while (false); |
1204 | return false; |
1205 | } |
1206 | |
1207 | restructureLoops(InnerLoop, OuterLoop); |
1208 | return true; |
1209 | } |
1210 | |
1211 | void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) { |
1212 | BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); |
1213 | BasicBlock *InnerLoopLatchPred = InnerLoopLatch; |
1214 | InnerLoopLatch = SplitBlock(InnerLoopLatchPred, Inc, DT, LI); |
1215 | } |
1216 | |
1217 | void LoopInterchangeTransform::splitInnerLoopHeader() { |
1218 | // Split the inner loop header out. Here make sure that the reduction PHI's |
1219 | // stay in the innerloop body. |
1220 | BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); |
1221 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); |
1222 | SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI); |
1223 | if (InnerLoopHasReduction) { |
1224 | // Adjust Reduction PHI's in the block. The induction PHI must be the first |
1225 | // PHI in InnerLoopHeader for this to work. |
1226 | SmallVector<PHINode *, 8> PHIVec; |
1227 | for (auto I = std::next(InnerLoopHeader->begin()); isa<PHINode>(I); ++I) { |
1228 | PHINode *PHI = dyn_cast<PHINode>(I); |
1229 | Value *V = PHI->getIncomingValueForBlock(InnerLoopPreHeader); |
1230 | PHI->replaceAllUsesWith(V); |
1231 | PHIVec.push_back((PHI)); |
1232 | } |
1233 | for (PHINode *P : PHIVec) { |
1234 | P->eraseFromParent(); |
1235 | } |
1236 | } |
1237 | |
1238 | DEBUG(dbgs() << "Output of splitInnerLoopHeader InnerLoopHeaderSucc & "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Output of splitInnerLoopHeader InnerLoopHeaderSucc & " "InnerLoopHeader\n"; } } while (false) |
1239 | "InnerLoopHeader\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "Output of splitInnerLoopHeader InnerLoopHeaderSucc & " "InnerLoopHeader\n"; } } while (false); |
1240 | } |
1241 | |
1242 | /// \brief Move all instructions except the terminator from FromBB right before |
1243 | /// InsertBefore |
1244 | static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) { |
1245 | auto &ToList = InsertBefore->getParent()->getInstList(); |
1246 | auto &FromList = FromBB->getInstList(); |
1247 | |
1248 | ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(), |
1249 | FromBB->getTerminator()->getIterator()); |
1250 | } |
1251 | |
1252 | void LoopInterchangeTransform::updateIncomingBlock(BasicBlock *CurrBlock, |
1253 | BasicBlock *OldPred, |
1254 | BasicBlock *NewPred) { |
1255 | for (auto I = CurrBlock->begin(); isa<PHINode>(I); ++I) { |
1256 | PHINode *PHI = cast<PHINode>(I); |
1257 | unsigned Num = PHI->getNumIncomingValues(); |
1258 | for (unsigned i = 0; i < Num; ++i) { |
1259 | if (PHI->getIncomingBlock(i) == OldPred) |
1260 | PHI->setIncomingBlock(i, NewPred); |
1261 | } |
1262 | } |
1263 | } |
1264 | |
1265 | /// \brief Update BI to jump to NewBB instead of OldBB. Records updates to |
1266 | /// the dominator tree in DTUpdates, if DT should be preserved. |
1267 | static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, |
1268 | BasicBlock *NewBB, |
1269 | std::vector<DominatorTree::UpdateType> &DTUpdates) { |
1270 | assert(llvm::count_if(BI->successors(),(static_cast <bool> (llvm::count_if(BI->successors() , [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 && "BI must jump to OldBB at most once.") ? void (0) : __assert_fail ("llvm::count_if(BI->successors(), [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 && \"BI must jump to OldBB at most once.\"" , "/build/llvm-toolchain-snapshot-7~svn326551/lib/Transforms/Scalar/LoopInterchange.cpp" , 1272, __extension__ __PRETTY_FUNCTION__)) |
1271 | [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 &&(static_cast <bool> (llvm::count_if(BI->successors() , [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 && "BI must jump to OldBB at most once.") ? void (0) : __assert_fail ("llvm::count_if(BI->successors(), [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 && \"BI must jump to OldBB at most once.\"" , "/build/llvm-toolchain-snapshot-7~svn326551/lib/Transforms/Scalar/LoopInterchange.cpp" , 1272, __extension__ __PRETTY_FUNCTION__)) |
1272 | "BI must jump to OldBB at most once.")(static_cast <bool> (llvm::count_if(BI->successors() , [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 && "BI must jump to OldBB at most once.") ? void (0) : __assert_fail ("llvm::count_if(BI->successors(), [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 && \"BI must jump to OldBB at most once.\"" , "/build/llvm-toolchain-snapshot-7~svn326551/lib/Transforms/Scalar/LoopInterchange.cpp" , 1272, __extension__ __PRETTY_FUNCTION__)); |
1273 | for (unsigned i = 0, e = BI->getNumSuccessors(); i < e; ++i) { |
1274 | if (BI->getSuccessor(i) == OldBB) { |
1275 | BI->setSuccessor(i, NewBB); |
1276 | |
1277 | DTUpdates.push_back( |
1278 | {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB}); |
1279 | DTUpdates.push_back( |
1280 | {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB}); |
1281 | break; |
1282 | } |
1283 | } |
1284 | } |
1285 | |
1286 | bool LoopInterchangeTransform::adjustLoopBranches() { |
1287 | DEBUG(dbgs() << "adjustLoopBranches called\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-interchange")) { dbgs() << "adjustLoopBranches called\n" ; } } while (false); |
1288 | std::vector<DominatorTree::UpdateType> DTUpdates; |
1289 | |
1290 | // Adjust the loop preheader |
1291 | BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); |
1292 | BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); |
1293 | BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); |
1294 | BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); |
1295 | BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); |
1296 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); |
1297 | BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor(); |
1298 | BasicBlock *InnerLoopLatchPredecessor = |
1299 | InnerLoopLatch->getUniquePredecessor(); |
1300 | BasicBlock *InnerLoopLatchSuccessor; |
1301 | BasicBlock *OuterLoopLatchSuccessor; |
1302 | |
1303 | BranchInst *OuterLoopLatchBI = |
1304 | dyn_cast<BranchInst>(OuterLoopLatch->getTerminator()); |
1305 | BranchInst *InnerLoopLatchBI = |
1306 | dyn_cast<BranchInst>(InnerLoopLatch->getTerminator()); |
1307 | BranchInst *OuterLoopHeaderBI = |
1308 | dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); |
1309 | BranchInst *InnerLoopHeaderBI = |
1310 | dyn_cast<BranchInst>(InnerLoopHeader->getTerminator()); |
1311 | |
1312 | if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor || |
1313 | !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI || |
1314 | !InnerLoopHeaderBI) |
1315 | return false; |
1316 | |
1317 | BranchInst *InnerLoopLatchPredecessorBI = |
1318 | dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator()); |
1319 | BranchInst *OuterLoopPredecessorBI = |
1320 | dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator()); |
1321 | |
1322 | if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI) |
1323 | return false; |
1324 | BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor(); |
1325 | if (!InnerLoopHeaderSuccessor) |
1326 | return false; |
1327 | |
1328 | // Adjust Loop Preheader and headers |
1329 | updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader, |
1330 | InnerLoopPreHeader, DTUpdates); |
1331 | updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates); |
1332 | updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader, |
1333 | InnerLoopHeaderSuccessor, DTUpdates); |
1334 | |
1335 | // Adjust reduction PHI's now that the incoming block has changed. |
1336 | updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader, |
1337 | OuterLoopHeader); |
1338 | |
1339 | updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor, |
1340 | OuterLoopPreHeader, DTUpdates); |
1341 | |
1342 | // -------------Adjust loop latches----------- |
1343 | if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader) |
1344 | InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1); |
1345 | else |
1346 | InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0); |
1347 | |
1348 | updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch, |
1349 | InnerLoopLatchSuccessor, DTUpdates); |
1350 | |
1351 | // Adjust PHI nodes in InnerLoopLatchSuccessor. Update all uses of PHI with |
1352 | // the value and remove this PHI node from inner loop. |
1353 | SmallVector<PHINode *, 8> LcssaVec; |
1354 | for (auto I = InnerLoopLatchSuccessor->begin(); isa<PHINode>(I); ++I) { |
1355 | PHINode *LcssaPhi = cast<PHINode>(I); |
1356 | LcssaVec.push_back(LcssaPhi); |
1357 | } |
1358 | for (PHINode *P : LcssaVec) { |
1359 | Value *Incoming = P->getIncomingValueForBlock(InnerLoopLatch); |
1360 | P->replaceAllUsesWith(Incoming); |
1361 | P->eraseFromParent(); |
1362 | } |
1363 | |
1364 | if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader) |
1365 | OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1); |
1366 | else |
1367 | OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0); |
1368 | |
1369 | updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor, |
1370 | OuterLoopLatchSuccessor, DTUpdates); |
1371 | updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch, |
1372 | DTUpdates); |
1373 | |
1374 | updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch); |
1375 | |
1376 | DT->applyUpdates(DTUpdates); |
1377 | return true; |
1378 | } |
1379 | |
1380 | void LoopInterchangeTransform::adjustLoopPreheaders() { |
1381 | // We have interchanged the preheaders so we need to interchange the data in |
1382 | // the preheader as well. |
1383 | // This is because the content of inner preheader was previously executed |
1384 | // inside the outer loop. |
1385 | BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); |
1386 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); |
1387 | BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); |
1388 | BranchInst *InnerTermBI = |
1389 | cast<BranchInst>(InnerLoopPreHeader->getTerminator()); |
1390 | |
1391 | // These instructions should now be executed inside the loop. |
1392 | // Move instruction into a new block after outer header. |
1393 | moveBBContents(InnerLoopPreHeader, OuterLoopHeader->getTerminator()); |
1394 | // These instructions were not executed previously in the loop so move them to |
1395 | // the older inner loop preheader. |
1396 | moveBBContents(OuterLoopPreHeader, InnerTermBI); |
1397 | } |
1398 | |
1399 | bool LoopInterchangeTransform::adjustLoopLinks() { |
1400 | // Adjust all branches in the inner and outer loop. |
1401 | bool Changed = adjustLoopBranches(); |
1402 | if (Changed) |
1403 | adjustLoopPreheaders(); |
1404 | return Changed; |
1405 | } |
1406 | |
1407 | char LoopInterchange::ID = 0; |
1408 | |
1409 | INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",static void *initializeLoopInterchangePassOnce(PassRegistry & Registry) { |
1410 | "Interchanges loops for cache reuse", false, false)static void *initializeLoopInterchangePassOnce(PassRegistry & Registry) { |
1411 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry); |
1412 | INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)initializeDependenceAnalysisWrapperPassPass(Registry); |
1413 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry); |
1414 | INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)initializeScalarEvolutionWrapperPassPass(Registry); |
1415 | INITIALIZE_PASS_DEPENDENCY(LoopSimplify)initializeLoopSimplifyPass(Registry); |
1416 | INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)initializeLCSSAWrapperPassPass(Registry); |
1417 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry); |
1418 | INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)initializeOptimizationRemarkEmitterWrapperPassPass(Registry); |
1419 | |
1420 | INITIALIZE_PASS_END(LoopInterchange, "loop-interchange",PassInfo *PI = new PassInfo( "Interchanges loops for cache reuse" , "loop-interchange", &LoopInterchange::ID, PassInfo::NormalCtor_t (callDefaultCtor<LoopInterchange>), false, false); Registry .registerPass(*PI, true); return PI; } static llvm::once_flag InitializeLoopInterchangePassFlag; void llvm::initializeLoopInterchangePass (PassRegistry &Registry) { llvm::call_once(InitializeLoopInterchangePassFlag , initializeLoopInterchangePassOnce, std::ref(Registry)); } |
1421 | "Interchanges loops for cache reuse", false, false)PassInfo *PI = new PassInfo( "Interchanges loops for cache reuse" , "loop-interchange", &LoopInterchange::ID, PassInfo::NormalCtor_t (callDefaultCtor<LoopInterchange>), false, false); Registry .registerPass(*PI, true); return PI; } static llvm::once_flag InitializeLoopInterchangePassFlag; void llvm::initializeLoopInterchangePass (PassRegistry &Registry) { llvm::call_once(InitializeLoopInterchangePassFlag , initializeLoopInterchangePassOnce, std::ref(Registry)); } |
1422 | |
1423 | Pass *llvm::createLoopInterchangePass() { return new LoopInterchange(); } |