File: | lib/Transforms/Utils/LoopUnrollRuntime.cpp |
Warning: | line 601, column 26 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===// | |||
2 | // | |||
3 | // The LLVM Compiler Infrastructure | |||
4 | // | |||
5 | // This file is distributed under the University of Illinois Open Source | |||
6 | // License. See LICENSE.TXT for details. | |||
7 | // | |||
8 | //===----------------------------------------------------------------------===// | |||
9 | // | |||
10 | // This file implements some loop unrolling utilities for loops with run-time | |||
11 | // trip counts. See LoopUnroll.cpp for unrolling loops with compile-time | |||
12 | // trip counts. | |||
13 | // | |||
14 | // The functions in this file are used to generate extra code when the | |||
15 | // run-time trip count modulo the unroll factor is not 0. When this is the | |||
16 | // case, we need to generate code to execute these 'left over' iterations. | |||
17 | // | |||
18 | // The current strategy generates an if-then-else sequence prior to the | |||
19 | // unrolled loop to execute the 'left over' iterations before or after the | |||
20 | // unrolled loop. | |||
21 | // | |||
22 | //===----------------------------------------------------------------------===// | |||
23 | ||||
24 | #include "llvm/ADT/SmallPtrSet.h" | |||
25 | #include "llvm/ADT/Statistic.h" | |||
26 | #include "llvm/Analysis/AliasAnalysis.h" | |||
27 | #include "llvm/Analysis/LoopIterator.h" | |||
28 | #include "llvm/Analysis/ScalarEvolution.h" | |||
29 | #include "llvm/Analysis/ScalarEvolutionExpander.h" | |||
30 | #include "llvm/IR/BasicBlock.h" | |||
31 | #include "llvm/IR/Dominators.h" | |||
32 | #include "llvm/IR/Metadata.h" | |||
33 | #include "llvm/IR/Module.h" | |||
34 | #include "llvm/Support/Debug.h" | |||
35 | #include "llvm/Support/raw_ostream.h" | |||
36 | #include "llvm/Transforms/Utils.h" | |||
37 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" | |||
38 | #include "llvm/Transforms/Utils/Cloning.h" | |||
39 | #include "llvm/Transforms/Utils/LoopUtils.h" | |||
40 | #include "llvm/Transforms/Utils/UnrollLoop.h" | |||
41 | #include <algorithm> | |||
42 | ||||
43 | using namespace llvm; | |||
44 | ||||
45 | #define DEBUG_TYPE"loop-unroll" "loop-unroll" | |||
46 | ||||
47 | STATISTIC(NumRuntimeUnrolled,static llvm::Statistic NumRuntimeUnrolled = {"loop-unroll", "NumRuntimeUnrolled" , "Number of loops unrolled with run-time trip counts", {0}, { false}} | |||
48 | "Number of loops unrolled with run-time trip counts")static llvm::Statistic NumRuntimeUnrolled = {"loop-unroll", "NumRuntimeUnrolled" , "Number of loops unrolled with run-time trip counts", {0}, { false}}; | |||
49 | static cl::opt<bool> UnrollRuntimeMultiExit( | |||
50 | "unroll-runtime-multi-exit", cl::init(false), cl::Hidden, | |||
51 | cl::desc("Allow runtime unrolling for loops with multiple exits, when " | |||
52 | "epilog is generated")); | |||
53 | ||||
54 | /// Connect the unrolling prolog code to the original loop. | |||
55 | /// The unrolling prolog code contains code to execute the | |||
56 | /// 'extra' iterations if the run-time trip count modulo the | |||
57 | /// unroll count is non-zero. | |||
58 | /// | |||
59 | /// This function performs the following: | |||
60 | /// - Create PHI nodes at prolog end block to combine values | |||
61 | /// that exit the prolog code and jump around the prolog. | |||
62 | /// - Add a PHI operand to a PHI node at the loop exit block | |||
63 | /// for values that exit the prolog and go around the loop. | |||
64 | /// - Branch around the original loop if the trip count is less | |||
65 | /// than the unroll factor. | |||
66 | /// | |||
67 | static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, | |||
68 | BasicBlock *PrologExit, | |||
69 | BasicBlock *OriginalLoopLatchExit, | |||
70 | BasicBlock *PreHeader, BasicBlock *NewPreHeader, | |||
71 | ValueToValueMapTy &VMap, DominatorTree *DT, | |||
72 | LoopInfo *LI, bool PreserveLCSSA) { | |||
73 | BasicBlock *Latch = L->getLoopLatch(); | |||
74 | assert(Latch && "Loop must have a latch")(static_cast <bool> (Latch && "Loop must have a latch" ) ? void (0) : __assert_fail ("Latch && \"Loop must have a latch\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Utils/LoopUnrollRuntime.cpp" , 74, __extension__ __PRETTY_FUNCTION__)); | |||
75 | BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]); | |||
76 | ||||
77 | // Create a PHI node for each outgoing value from the original loop | |||
78 | // (which means it is an outgoing value from the prolog code too). | |||
79 | // The new PHI node is inserted in the prolog end basic block. | |||
80 | // The new PHI node value is added as an operand of a PHI node in either | |||
81 | // the loop header or the loop exit block. | |||
82 | for (BasicBlock *Succ : successors(Latch)) { | |||
83 | for (PHINode &PN : Succ->phis()) { | |||
84 | // Add a new PHI node to the prolog end block and add the | |||
85 | // appropriate incoming values. | |||
86 | PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr", | |||
87 | PrologExit->getFirstNonPHI()); | |||
88 | // Adding a value to the new PHI node from the original loop preheader. | |||
89 | // This is the value that skips all the prolog code. | |||
90 | if (L->contains(&PN)) { | |||
91 | NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), | |||
92 | PreHeader); | |||
93 | } else { | |||
94 | NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader); | |||
95 | } | |||
96 | ||||
97 | Value *V = PN.getIncomingValueForBlock(Latch); | |||
98 | if (Instruction *I = dyn_cast<Instruction>(V)) { | |||
99 | if (L->contains(I)) { | |||
100 | V = VMap.lookup(I); | |||
101 | } | |||
102 | } | |||
103 | // Adding a value to the new PHI node from the last prolog block | |||
104 | // that was created. | |||
105 | NewPN->addIncoming(V, PrologLatch); | |||
106 | ||||
107 | // Update the existing PHI node operand with the value from the | |||
108 | // new PHI node. How this is done depends on if the existing | |||
109 | // PHI node is in the original loop block, or the exit block. | |||
110 | if (L->contains(&PN)) { | |||
111 | PN.setIncomingValue(PN.getBasicBlockIndex(NewPreHeader), NewPN); | |||
112 | } else { | |||
113 | PN.addIncoming(NewPN, PrologExit); | |||
114 | } | |||
115 | } | |||
116 | } | |||
117 | ||||
118 | // Make sure that created prolog loop is in simplified form | |||
119 | SmallVector<BasicBlock *, 4> PrologExitPreds; | |||
120 | Loop *PrologLoop = LI->getLoopFor(PrologLatch); | |||
121 | if (PrologLoop) { | |||
122 | for (BasicBlock *PredBB : predecessors(PrologExit)) | |||
123 | if (PrologLoop->contains(PredBB)) | |||
124 | PrologExitPreds.push_back(PredBB); | |||
125 | ||||
126 | SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI, | |||
127 | PreserveLCSSA); | |||
128 | } | |||
129 | ||||
130 | // Create a branch around the original loop, which is taken if there are no | |||
131 | // iterations remaining to be executed after running the prologue. | |||
132 | Instruction *InsertPt = PrologExit->getTerminator(); | |||
133 | IRBuilder<> B(InsertPt); | |||
134 | ||||
135 | assert(Count != 0 && "nonsensical Count!")(static_cast <bool> (Count != 0 && "nonsensical Count!" ) ? void (0) : __assert_fail ("Count != 0 && \"nonsensical Count!\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Utils/LoopUnrollRuntime.cpp" , 135, __extension__ __PRETTY_FUNCTION__)); | |||
136 | ||||
137 | // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1) | |||
138 | // This means %xtraiter is (BECount + 1) and all of the iterations of this | |||
139 | // loop were executed by the prologue. Note that if BECount <u (Count - 1) | |||
140 | // then (BECount + 1) cannot unsigned-overflow. | |||
141 | Value *BrLoopExit = | |||
142 | B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)); | |||
143 | // Split the exit to maintain loop canonicalization guarantees | |||
144 | SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit)); | |||
145 | SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI, | |||
146 | PreserveLCSSA); | |||
147 | // Add the branch to the exit block (around the unrolled loop) | |||
148 | B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader); | |||
149 | InsertPt->eraseFromParent(); | |||
150 | if (DT) | |||
151 | DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit); | |||
152 | } | |||
153 | ||||
154 | /// Connect the unrolling epilog code to the original loop. | |||
155 | /// The unrolling epilog code contains code to execute the | |||
156 | /// 'extra' iterations if the run-time trip count modulo the | |||
157 | /// unroll count is non-zero. | |||
158 | /// | |||
159 | /// This function performs the following: | |||
160 | /// - Update PHI nodes at the unrolling loop exit and epilog loop exit | |||
161 | /// - Create PHI nodes at the unrolling loop exit to combine | |||
162 | /// values that exit the unrolling loop code and jump around it. | |||
163 | /// - Update PHI operands in the epilog loop by the new PHI nodes | |||
164 | /// - Branch around the epilog loop if extra iters (ModVal) is zero. | |||
165 | /// | |||
166 | static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, | |||
167 | BasicBlock *Exit, BasicBlock *PreHeader, | |||
168 | BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, | |||
169 | ValueToValueMapTy &VMap, DominatorTree *DT, | |||
170 | LoopInfo *LI, bool PreserveLCSSA) { | |||
171 | BasicBlock *Latch = L->getLoopLatch(); | |||
172 | assert(Latch && "Loop must have a latch")(static_cast <bool> (Latch && "Loop must have a latch" ) ? void (0) : __assert_fail ("Latch && \"Loop must have a latch\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Utils/LoopUnrollRuntime.cpp" , 172, __extension__ __PRETTY_FUNCTION__)); | |||
173 | BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]); | |||
174 | ||||
175 | // Loop structure should be the following: | |||
176 | // | |||
177 | // PreHeader | |||
178 | // NewPreHeader | |||
179 | // Header | |||
180 | // ... | |||
181 | // Latch | |||
182 | // NewExit (PN) | |||
183 | // EpilogPreHeader | |||
184 | // EpilogHeader | |||
185 | // ... | |||
186 | // EpilogLatch | |||
187 | // Exit (EpilogPN) | |||
188 | ||||
189 | // Update PHI nodes at NewExit and Exit. | |||
190 | for (PHINode &PN : NewExit->phis()) { | |||
191 | // PN should be used in another PHI located in Exit block as | |||
192 | // Exit was split by SplitBlockPredecessors into Exit and NewExit | |||
193 | // Basicaly it should look like: | |||
194 | // NewExit: | |||
195 | // PN = PHI [I, Latch] | |||
196 | // ... | |||
197 | // Exit: | |||
198 | // EpilogPN = PHI [PN, EpilogPreHeader] | |||
199 | // | |||
200 | // There is EpilogPreHeader incoming block instead of NewExit as | |||
201 | // NewExit was spilt 1 more time to get EpilogPreHeader. | |||
202 | assert(PN.hasOneUse() && "The phi should have 1 use")(static_cast <bool> (PN.hasOneUse() && "The phi should have 1 use" ) ? void (0) : __assert_fail ("PN.hasOneUse() && \"The phi should have 1 use\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Utils/LoopUnrollRuntime.cpp" , 202, __extension__ __PRETTY_FUNCTION__)); | |||
203 | PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser()); | |||
204 | assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block")(static_cast <bool> (EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block") ? void (0) : __assert_fail ("EpilogPN->getParent() == Exit && \"EpilogPN should be in Exit block\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Utils/LoopUnrollRuntime.cpp" , 204, __extension__ __PRETTY_FUNCTION__)); | |||
205 | ||||
206 | // Add incoming PreHeader from branch around the Loop | |||
207 | PN.addIncoming(UndefValue::get(PN.getType()), PreHeader); | |||
208 | ||||
209 | Value *V = PN.getIncomingValueForBlock(Latch); | |||
210 | Instruction *I = dyn_cast<Instruction>(V); | |||
211 | if (I && L->contains(I)) | |||
212 | // If value comes from an instruction in the loop add VMap value. | |||
213 | V = VMap.lookup(I); | |||
214 | // For the instruction out of the loop, constant or undefined value | |||
215 | // insert value itself. | |||
216 | EpilogPN->addIncoming(V, EpilogLatch); | |||
217 | ||||
218 | assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&(static_cast <bool> (EpilogPN->getBasicBlockIndex(EpilogPreHeader ) >= 0 && "EpilogPN should have EpilogPreHeader incoming block" ) ? void (0) : __assert_fail ("EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 && \"EpilogPN should have EpilogPreHeader incoming block\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Utils/LoopUnrollRuntime.cpp" , 219, __extension__ __PRETTY_FUNCTION__)) | |||
219 | "EpilogPN should have EpilogPreHeader incoming block")(static_cast <bool> (EpilogPN->getBasicBlockIndex(EpilogPreHeader ) >= 0 && "EpilogPN should have EpilogPreHeader incoming block" ) ? void (0) : __assert_fail ("EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 && \"EpilogPN should have EpilogPreHeader incoming block\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Utils/LoopUnrollRuntime.cpp" , 219, __extension__ __PRETTY_FUNCTION__)); | |||
220 | // Change EpilogPreHeader incoming block to NewExit. | |||
221 | EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader), | |||
222 | NewExit); | |||
223 | // Now PHIs should look like: | |||
224 | // NewExit: | |||
225 | // PN = PHI [I, Latch], [undef, PreHeader] | |||
226 | // ... | |||
227 | // Exit: | |||
228 | // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch] | |||
229 | } | |||
230 | ||||
231 | // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader). | |||
232 | // Update corresponding PHI nodes in epilog loop. | |||
233 | for (BasicBlock *Succ : successors(Latch)) { | |||
234 | // Skip this as we already updated phis in exit blocks. | |||
235 | if (!L->contains(Succ)) | |||
236 | continue; | |||
237 | for (PHINode &PN : Succ->phis()) { | |||
238 | // Add new PHI nodes to the loop exit block and update epilog | |||
239 | // PHIs with the new PHI values. | |||
240 | PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr", | |||
241 | NewExit->getFirstNonPHI()); | |||
242 | // Adding a value to the new PHI node from the unrolling loop preheader. | |||
243 | NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader); | |||
244 | // Adding a value to the new PHI node from the unrolling loop latch. | |||
245 | NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch); | |||
246 | ||||
247 | // Update the existing PHI node operand with the value from the new PHI | |||
248 | // node. Corresponding instruction in epilog loop should be PHI. | |||
249 | PHINode *VPN = cast<PHINode>(VMap[&PN]); | |||
250 | VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN); | |||
251 | } | |||
252 | } | |||
253 | ||||
254 | Instruction *InsertPt = NewExit->getTerminator(); | |||
255 | IRBuilder<> B(InsertPt); | |||
256 | Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod"); | |||
257 | assert(Exit && "Loop must have a single exit block only")(static_cast <bool> (Exit && "Loop must have a single exit block only" ) ? void (0) : __assert_fail ("Exit && \"Loop must have a single exit block only\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Utils/LoopUnrollRuntime.cpp" , 257, __extension__ __PRETTY_FUNCTION__)); | |||
258 | // Split the epilogue exit to maintain loop canonicalization guarantees | |||
259 | SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); | |||
260 | SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, | |||
261 | PreserveLCSSA); | |||
262 | // Add the branch to the exit block (around the unrolling loop) | |||
263 | B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit); | |||
264 | InsertPt->eraseFromParent(); | |||
265 | if (DT) | |||
266 | DT->changeImmediateDominator(Exit, NewExit); | |||
267 | ||||
268 | // Split the main loop exit to maintain canonicalization guarantees. | |||
269 | SmallVector<BasicBlock*, 4> NewExitPreds{Latch}; | |||
270 | SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, | |||
271 | PreserveLCSSA); | |||
272 | } | |||
273 | ||||
274 | /// Create a clone of the blocks in a loop and connect them together. | |||
275 | /// If CreateRemainderLoop is false, loop structure will not be cloned, | |||
276 | /// otherwise a new loop will be created including all cloned blocks, and the | |||
277 | /// iterator of it switches to count NewIter down to 0. | |||
278 | /// The cloned blocks should be inserted between InsertTop and InsertBot. | |||
279 | /// If loop structure is cloned InsertTop should be new preheader, InsertBot | |||
280 | /// new loop exit. | |||
281 | /// Return the new cloned loop that is created when CreateRemainderLoop is true. | |||
282 | static Loop * | |||
283 | CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, | |||
284 | const bool UseEpilogRemainder, const bool UnrollRemainder, | |||
285 | BasicBlock *InsertTop, | |||
286 | BasicBlock *InsertBot, BasicBlock *Preheader, | |||
287 | std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, | |||
288 | ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { | |||
289 | StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; | |||
290 | BasicBlock *Header = L->getHeader(); | |||
291 | BasicBlock *Latch = L->getLoopLatch(); | |||
292 | Function *F = Header->getParent(); | |||
293 | LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); | |||
294 | LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); | |||
295 | Loop *ParentLoop = L->getParentLoop(); | |||
296 | NewLoopsMap NewLoops; | |||
297 | NewLoops[ParentLoop] = ParentLoop; | |||
298 | if (!CreateRemainderLoop) | |||
299 | NewLoops[L] = ParentLoop; | |||
300 | ||||
301 | // For each block in the original loop, create a new copy, | |||
302 | // and update the value map with the newly created values. | |||
303 | for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { | |||
304 | BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); | |||
305 | NewBlocks.push_back(NewBB); | |||
306 | ||||
307 | // If we're unrolling the outermost loop, there's no remainder loop, | |||
308 | // and this block isn't in a nested loop, then the new block is not | |||
309 | // in any loop. Otherwise, add it to loopinfo. | |||
310 | if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop) | |||
311 | addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); | |||
312 | ||||
313 | VMap[*BB] = NewBB; | |||
314 | if (Header == *BB) { | |||
315 | // For the first block, add a CFG connection to this newly | |||
316 | // created block. | |||
317 | InsertTop->getTerminator()->setSuccessor(0, NewBB); | |||
318 | } | |||
319 | ||||
320 | if (DT) { | |||
321 | if (Header == *BB) { | |||
322 | // The header is dominated by the preheader. | |||
323 | DT->addNewBlock(NewBB, InsertTop); | |||
324 | } else { | |||
325 | // Copy information from original loop to unrolled loop. | |||
326 | BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock(); | |||
327 | DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB])); | |||
328 | } | |||
329 | } | |||
330 | ||||
331 | if (Latch == *BB) { | |||
332 | // For the last block, if CreateRemainderLoop is false, create a direct | |||
333 | // jump to InsertBot. If not, create a loop back to cloned head. | |||
334 | VMap.erase((*BB)->getTerminator()); | |||
335 | BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); | |||
336 | BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); | |||
337 | IRBuilder<> Builder(LatchBR); | |||
338 | if (!CreateRemainderLoop) { | |||
339 | Builder.CreateBr(InsertBot); | |||
340 | } else { | |||
341 | PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, | |||
342 | suffix + ".iter", | |||
343 | FirstLoopBB->getFirstNonPHI()); | |||
344 | Value *IdxSub = | |||
345 | Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), | |||
346 | NewIdx->getName() + ".sub"); | |||
347 | Value *IdxCmp = | |||
348 | Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); | |||
349 | Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); | |||
350 | NewIdx->addIncoming(NewIter, InsertTop); | |||
351 | NewIdx->addIncoming(IdxSub, NewBB); | |||
352 | } | |||
353 | LatchBR->eraseFromParent(); | |||
354 | } | |||
355 | } | |||
356 | ||||
357 | // Change the incoming values to the ones defined in the preheader or | |||
358 | // cloned loop. | |||
359 | for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { | |||
360 | PHINode *NewPHI = cast<PHINode>(VMap[&*I]); | |||
361 | if (!CreateRemainderLoop) { | |||
362 | if (UseEpilogRemainder) { | |||
363 | unsigned idx = NewPHI->getBasicBlockIndex(Preheader); | |||
364 | NewPHI->setIncomingBlock(idx, InsertTop); | |||
365 | NewPHI->removeIncomingValue(Latch, false); | |||
366 | } else { | |||
367 | VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader); | |||
368 | cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); | |||
369 | } | |||
370 | } else { | |||
371 | unsigned idx = NewPHI->getBasicBlockIndex(Preheader); | |||
372 | NewPHI->setIncomingBlock(idx, InsertTop); | |||
373 | BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); | |||
374 | idx = NewPHI->getBasicBlockIndex(Latch); | |||
375 | Value *InVal = NewPHI->getIncomingValue(idx); | |||
376 | NewPHI->setIncomingBlock(idx, NewLatch); | |||
377 | if (Value *V = VMap.lookup(InVal)) | |||
378 | NewPHI->setIncomingValue(idx, V); | |||
379 | } | |||
380 | } | |||
381 | if (CreateRemainderLoop) { | |||
382 | Loop *NewLoop = NewLoops[L]; | |||
383 | assert(NewLoop && "L should have been cloned")(static_cast <bool> (NewLoop && "L should have been cloned" ) ? void (0) : __assert_fail ("NewLoop && \"L should have been cloned\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Utils/LoopUnrollRuntime.cpp" , 383, __extension__ __PRETTY_FUNCTION__)); | |||
384 | ||||
385 | // Only add loop metadata if the loop is not going to be completely | |||
386 | // unrolled. | |||
387 | if (UnrollRemainder) | |||
388 | return NewLoop; | |||
389 | ||||
390 | // Add unroll disable metadata to disable future unrolling for this loop. | |||
391 | NewLoop->setLoopAlreadyUnrolled(); | |||
392 | return NewLoop; | |||
393 | } | |||
394 | else | |||
395 | return nullptr; | |||
396 | } | |||
397 | ||||
398 | /// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits | |||
399 | /// is populated with all the loop exit blocks other than the LatchExit block. | |||
400 | static bool | |||
401 | canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, | |||
402 | BasicBlock *LatchExit, bool PreserveLCSSA, | |||
403 | bool UseEpilogRemainder) { | |||
404 | ||||
405 | // We currently have some correctness constrains in unrolling a multi-exit | |||
406 | // loop. Check for these below. | |||
407 | ||||
408 | // We rely on LCSSA form being preserved when the exit blocks are transformed. | |||
409 | if (!PreserveLCSSA) | |||
410 | return false; | |||
411 | SmallVector<BasicBlock *, 4> Exits; | |||
412 | L->getUniqueExitBlocks(Exits); | |||
413 | for (auto *BB : Exits) | |||
414 | if (BB != LatchExit) | |||
415 | OtherExits.push_back(BB); | |||
416 | ||||
417 | // TODO: Support multiple exiting blocks jumping to the `LatchExit` when | |||
418 | // UnrollRuntimeMultiExit is true. This will need updating the logic in | |||
419 | // connectEpilog/connectProlog. | |||
420 | if (!LatchExit->getSinglePredecessor()) { | |||
421 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Bailout for multi-exit handling when latch exit has >1 " "predecessor.\n"; } } while (false) | |||
422 | dbgs() << "Bailout for multi-exit handling when latch exit has >1 "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Bailout for multi-exit handling when latch exit has >1 " "predecessor.\n"; } } while (false) | |||
423 | "predecessor.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Bailout for multi-exit handling when latch exit has >1 " "predecessor.\n"; } } while (false); | |||
424 | return false; | |||
425 | } | |||
426 | // FIXME: We bail out of multi-exit unrolling when epilog loop is generated | |||
427 | // and L is an inner loop. This is because in presence of multiple exits, the | |||
428 | // outer loop is incorrect: we do not add the EpilogPreheader and exit to the | |||
429 | // outer loop. This is automatically handled in the prolog case, so we do not | |||
430 | // have that bug in prolog generation. | |||
431 | if (UseEpilogRemainder && L->getParentLoop()) | |||
432 | return false; | |||
433 | ||||
434 | // All constraints have been satisfied. | |||
435 | return true; | |||
436 | } | |||
437 | ||||
438 | /// Returns true if we can profitably unroll the multi-exit loop L. Currently, | |||
439 | /// we return true only if UnrollRuntimeMultiExit is set to true. | |||
440 | static bool canProfitablyUnrollMultiExitLoop( | |||
441 | Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit, | |||
442 | bool PreserveLCSSA, bool UseEpilogRemainder) { | |||
443 | ||||
444 | #if !defined(NDEBUG) | |||
445 | SmallVector<BasicBlock *, 8> OtherExitsDummyCheck; | |||
446 | assert(canSafelyUnrollMultiExitLoop(L, OtherExitsDummyCheck, LatchExit,(static_cast <bool> (canSafelyUnrollMultiExitLoop(L, OtherExitsDummyCheck , LatchExit, PreserveLCSSA, UseEpilogRemainder) && "Should be safe to unroll before checking profitability!" ) ? void (0) : __assert_fail ("canSafelyUnrollMultiExitLoop(L, OtherExitsDummyCheck, LatchExit, PreserveLCSSA, UseEpilogRemainder) && \"Should be safe to unroll before checking profitability!\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Utils/LoopUnrollRuntime.cpp" , 448, __extension__ __PRETTY_FUNCTION__)) | |||
447 | PreserveLCSSA, UseEpilogRemainder) &&(static_cast <bool> (canSafelyUnrollMultiExitLoop(L, OtherExitsDummyCheck , LatchExit, PreserveLCSSA, UseEpilogRemainder) && "Should be safe to unroll before checking profitability!" ) ? void (0) : __assert_fail ("canSafelyUnrollMultiExitLoop(L, OtherExitsDummyCheck, LatchExit, PreserveLCSSA, UseEpilogRemainder) && \"Should be safe to unroll before checking profitability!\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Utils/LoopUnrollRuntime.cpp" , 448, __extension__ __PRETTY_FUNCTION__)) | |||
448 | "Should be safe to unroll before checking profitability!")(static_cast <bool> (canSafelyUnrollMultiExitLoop(L, OtherExitsDummyCheck , LatchExit, PreserveLCSSA, UseEpilogRemainder) && "Should be safe to unroll before checking profitability!" ) ? void (0) : __assert_fail ("canSafelyUnrollMultiExitLoop(L, OtherExitsDummyCheck, LatchExit, PreserveLCSSA, UseEpilogRemainder) && \"Should be safe to unroll before checking profitability!\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Utils/LoopUnrollRuntime.cpp" , 448, __extension__ __PRETTY_FUNCTION__)); | |||
449 | #endif | |||
450 | ||||
451 | // Priority goes to UnrollRuntimeMultiExit if it's supplied. | |||
452 | if (UnrollRuntimeMultiExit.getNumOccurrences()) | |||
453 | return UnrollRuntimeMultiExit; | |||
454 | ||||
455 | // The main pain point with multi-exit loop unrolling is that once unrolled, | |||
456 | // we will not be able to merge all blocks into a straight line code. | |||
457 | // There are branches within the unrolled loop that go to the OtherExits. | |||
458 | // The second point is the increase in code size, but this is true | |||
459 | // irrespective of multiple exits. | |||
460 | ||||
461 | // Note: Both the heuristics below are coarse grained. We are essentially | |||
462 | // enabling unrolling of loops that have a single side exit other than the | |||
463 | // normal LatchExit (i.e. exiting into a deoptimize block). | |||
464 | // The heuristics considered are: | |||
465 | // 1. low number of branches in the unrolled version. | |||
466 | // 2. high predictability of these extra branches. | |||
467 | // We avoid unrolling loops that have more than two exiting blocks. This | |||
468 | // limits the total number of branches in the unrolled loop to be atmost | |||
469 | // the unroll factor (since one of the exiting blocks is the latch block). | |||
470 | SmallVector<BasicBlock*, 4> ExitingBlocks; | |||
471 | L->getExitingBlocks(ExitingBlocks); | |||
472 | if (ExitingBlocks.size() > 2) | |||
473 | return false; | |||
474 | ||||
475 | // The second heuristic is that L has one exit other than the latchexit and | |||
476 | // that exit is a deoptimize block. We know that deoptimize blocks are rarely | |||
477 | // taken, which also implies the branch leading to the deoptimize block is | |||
478 | // highly predictable. | |||
479 | return (OtherExits.size() == 1 && | |||
480 | OtherExits[0]->getTerminatingDeoptimizeCall()); | |||
481 | // TODO: These can be fine-tuned further to consider code size or deopt states | |||
482 | // that are captured by the deoptimize exit block. | |||
483 | // Also, we can extend this to support more cases, if we actually | |||
484 | // know of kinds of multiexit loops that would benefit from unrolling. | |||
485 | } | |||
486 | ||||
487 | /// Insert code in the prolog/epilog code when unrolling a loop with a | |||
488 | /// run-time trip-count. | |||
489 | /// | |||
490 | /// This method assumes that the loop unroll factor is total number | |||
491 | /// of loop bodies in the loop after unrolling. (Some folks refer | |||
492 | /// to the unroll factor as the number of *extra* copies added). | |||
493 | /// We assume also that the loop unroll factor is a power-of-two. So, after | |||
494 | /// unrolling the loop, the number of loop bodies executed is 2, | |||
495 | /// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch | |||
496 | /// instruction in SimplifyCFG.cpp. Then, the backend decides how code for | |||
497 | /// the switch instruction is generated. | |||
498 | /// | |||
499 | /// ***Prolog case*** | |||
500 | /// extraiters = tripcount % loopfactor | |||
501 | /// if (extraiters == 0) jump Loop: | |||
502 | /// else jump Prol: | |||
503 | /// Prol: LoopBody; | |||
504 | /// extraiters -= 1 // Omitted if unroll factor is 2. | |||
505 | /// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2. | |||
506 | /// if (tripcount < loopfactor) jump End: | |||
507 | /// Loop: | |||
508 | /// ... | |||
509 | /// End: | |||
510 | /// | |||
511 | /// ***Epilog case*** | |||
512 | /// extraiters = tripcount % loopfactor | |||
513 | /// if (tripcount < loopfactor) jump LoopExit: | |||
514 | /// unroll_iters = tripcount - extraiters | |||
515 | /// Loop: LoopBody; (executes unroll_iter times); | |||
516 | /// unroll_iter -= 1 | |||
517 | /// if (unroll_iter != 0) jump Loop: | |||
518 | /// LoopExit: | |||
519 | /// if (extraiters == 0) jump EpilExit: | |||
520 | /// Epil: LoopBody; (executes extraiters times) | |||
521 | /// extraiters -= 1 // Omitted if unroll factor is 2. | |||
522 | /// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2. | |||
523 | /// EpilExit: | |||
524 | ||||
525 | bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, | |||
526 | bool AllowExpensiveTripCount, | |||
527 | bool UseEpilogRemainder, | |||
528 | bool UnrollRemainder, | |||
529 | LoopInfo *LI, ScalarEvolution *SE, | |||
530 | DominatorTree *DT, AssumptionCache *AC, | |||
531 | bool PreserveLCSSA) { | |||
532 | LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Trying runtime unrolling on Loop: \n" ; } } while (false); | |||
533 | LLVM_DEBUG(L->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { L->dump(); } } while (false); | |||
534 | LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" : dbgs() << "Using prolog remainder.\n"; } } while (false ) | |||
535 | : dbgs() << "Using prolog remainder.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" : dbgs() << "Using prolog remainder.\n"; } } while (false ); | |||
536 | ||||
537 | // Make sure the loop is in canonical form. | |||
538 | if (!L->isLoopSimplifyForm()) { | |||
| ||||
539 | LLVM_DEBUG(dbgs() << "Not in simplify form!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Not in simplify form!\n"; } } while (false); | |||
540 | return false; | |||
541 | } | |||
542 | ||||
543 | // Guaranteed by LoopSimplifyForm. | |||
544 | BasicBlock *Latch = L->getLoopLatch(); | |||
545 | BasicBlock *Header = L->getHeader(); | |||
546 | ||||
547 | BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); | |||
548 | unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; | |||
549 | BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex); | |||
550 | // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the | |||
551 | // targets of the Latch be an exit block out of the loop. This needs | |||
552 | // to be guaranteed by the callers of UnrollRuntimeLoopRemainder. | |||
553 | assert(!L->contains(LatchExit) &&(static_cast <bool> (!L->contains(LatchExit) && "one of the loop latch successors should be the exit block!" ) ? void (0) : __assert_fail ("!L->contains(LatchExit) && \"one of the loop latch successors should be the exit block!\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Utils/LoopUnrollRuntime.cpp" , 554, __extension__ __PRETTY_FUNCTION__)) | |||
554 | "one of the loop latch successors should be the exit block!")(static_cast <bool> (!L->contains(LatchExit) && "one of the loop latch successors should be the exit block!" ) ? void (0) : __assert_fail ("!L->contains(LatchExit) && \"one of the loop latch successors should be the exit block!\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Utils/LoopUnrollRuntime.cpp" , 554, __extension__ __PRETTY_FUNCTION__)); | |||
555 | // These are exit blocks other than the target of the latch exiting block. | |||
556 | SmallVector<BasicBlock *, 4> OtherExits; | |||
557 | bool isMultiExitUnrollingEnabled = | |||
558 | canSafelyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, | |||
559 | UseEpilogRemainder) && | |||
560 | canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, | |||
561 | UseEpilogRemainder); | |||
562 | // Support only single exit and exiting block unless multi-exit loop unrolling is enabled. | |||
563 | if (!isMultiExitUnrollingEnabled && | |||
564 | (!L->getExitingBlock() || OtherExits.size())) { | |||
565 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Multiple exit/exiting blocks in loop and multi-exit unrolling not " "enabled!\n"; } } while (false) | |||
566 | dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Multiple exit/exiting blocks in loop and multi-exit unrolling not " "enabled!\n"; } } while (false) | |||
567 | << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Multiple exit/exiting blocks in loop and multi-exit unrolling not " "enabled!\n"; } } while (false) | |||
568 | "enabled!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Multiple exit/exiting blocks in loop and multi-exit unrolling not " "enabled!\n"; } } while (false); | |||
569 | return false; | |||
570 | } | |||
571 | // Use Scalar Evolution to compute the trip count. This allows more loops to | |||
572 | // be unrolled than relying on induction var simplification. | |||
573 | if (!SE) | |||
574 | return false; | |||
575 | ||||
576 | // Only unroll loops with a computable trip count, and the trip count needs | |||
577 | // to be an int value (allowing a pointer type is a TODO item). | |||
578 | // We calculate the backedge count by using getExitCount on the Latch block, | |||
579 | // which is proven to be the only exiting block in this loop. This is same as | |||
580 | // calculating getBackedgeTakenCount on the loop (which computes SCEV for all | |||
581 | // exiting blocks). | |||
582 | const SCEV *BECountSC = SE->getExitCount(L, Latch); | |||
583 | if (isa<SCEVCouldNotCompute>(BECountSC) || | |||
584 | !BECountSC->getType()->isIntegerTy()) { | |||
585 | LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Could not compute exit block SCEV\n" ; } } while (false); | |||
586 | return false; | |||
587 | } | |||
588 | ||||
589 | unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth(); | |||
590 | ||||
591 | // Add 1 since the backedge count doesn't include the first loop iteration. | |||
592 | const SCEV *TripCountSC = | |||
593 | SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); | |||
594 | if (isa<SCEVCouldNotCompute>(TripCountSC)) { | |||
595 | LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Could not compute trip count SCEV.\n" ; } } while (false); | |||
596 | return false; | |||
597 | } | |||
598 | ||||
599 | BasicBlock *PreHeader = L->getLoopPreheader(); | |||
600 | BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); | |||
601 | const DataLayout &DL = Header->getModule()->getDataLayout(); | |||
| ||||
602 | SCEVExpander Expander(*SE, DL, "loop-unroll"); | |||
603 | if (!AllowExpensiveTripCount && | |||
604 | Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) { | |||
605 | LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "High cost for expanding trip count scev!\n" ; } } while (false); | |||
606 | return false; | |||
607 | } | |||
608 | ||||
609 | // This constraint lets us deal with an overflowing trip count easily; see the | |||
610 | // comment on ModVal below. | |||
611 | if (Log2_32(Count) > BEWidth) { | |||
612 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Count failed constraint on overflow trip count calculation.\n" ; } } while (false) | |||
613 | dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Count failed constraint on overflow trip count calculation.\n" ; } } while (false) | |||
614 | << "Count failed constraint on overflow trip count calculation.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Count failed constraint on overflow trip count calculation.\n" ; } } while (false); | |||
615 | return false; | |||
616 | } | |||
617 | ||||
618 | // Loop structure is the following: | |||
619 | // | |||
620 | // PreHeader | |||
621 | // Header | |||
622 | // ... | |||
623 | // Latch | |||
624 | // LatchExit | |||
625 | ||||
626 | BasicBlock *NewPreHeader; | |||
627 | BasicBlock *NewExit = nullptr; | |||
628 | BasicBlock *PrologExit = nullptr; | |||
629 | BasicBlock *EpilogPreHeader = nullptr; | |||
630 | BasicBlock *PrologPreHeader = nullptr; | |||
631 | ||||
632 | if (UseEpilogRemainder) { | |||
633 | // If epilog remainder | |||
634 | // Split PreHeader to insert a branch around loop for unrolling. | |||
635 | NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI); | |||
636 | NewPreHeader->setName(PreHeader->getName() + ".new"); | |||
637 | // Split LatchExit to create phi nodes from branch above. | |||
638 | SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit)); | |||
639 | NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", | |||
640 | DT, LI, PreserveLCSSA); | |||
641 | // NewExit gets its DebugLoc from LatchExit, which is not part of the | |||
642 | // original Loop. | |||
643 | // Fix this by setting Loop's DebugLoc to NewExit. | |||
644 | auto *NewExitTerminator = NewExit->getTerminator(); | |||
645 | NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc()); | |||
646 | // Split NewExit to insert epilog remainder loop. | |||
647 | EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI); | |||
648 | EpilogPreHeader->setName(Header->getName() + ".epil.preheader"); | |||
649 | } else { | |||
650 | // If prolog remainder | |||
651 | // Split the original preheader twice to insert prolog remainder loop | |||
652 | PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI); | |||
653 | PrologPreHeader->setName(Header->getName() + ".prol.preheader"); | |||
654 | PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(), | |||
655 | DT, LI); | |||
656 | PrologExit->setName(Header->getName() + ".prol.loopexit"); | |||
657 | // Split PrologExit to get NewPreHeader. | |||
658 | NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI); | |||
659 | NewPreHeader->setName(PreHeader->getName() + ".new"); | |||
660 | } | |||
661 | // Loop structure should be the following: | |||
662 | // Epilog Prolog | |||
663 | // | |||
664 | // PreHeader PreHeader | |||
665 | // *NewPreHeader *PrologPreHeader | |||
666 | // Header *PrologExit | |||
667 | // ... *NewPreHeader | |||
668 | // Latch Header | |||
669 | // *NewExit ... | |||
670 | // *EpilogPreHeader Latch | |||
671 | // LatchExit LatchExit | |||
672 | ||||
673 | // Calculate conditions for branch around loop for unrolling | |||
674 | // in epilog case and around prolog remainder loop in prolog case. | |||
675 | // Compute the number of extra iterations required, which is: | |||
676 | // extra iterations = run-time trip count % loop unroll factor | |||
677 | PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); | |||
678 | Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), | |||
679 | PreHeaderBR); | |||
680 | Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), | |||
681 | PreHeaderBR); | |||
682 | IRBuilder<> B(PreHeaderBR); | |||
683 | Value *ModVal; | |||
684 | // Calculate ModVal = (BECount + 1) % Count. | |||
685 | // Note that TripCount is BECount + 1. | |||
686 | if (isPowerOf2_32(Count)) { | |||
687 | // When Count is power of 2 we don't BECount for epilog case, however we'll | |||
688 | // need it for a branch around unrolling loop for prolog case. | |||
689 | ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); | |||
690 | // 1. There are no iterations to be run in the prolog/epilog loop. | |||
691 | // OR | |||
692 | // 2. The addition computing TripCount overflowed. | |||
693 | // | |||
694 | // If (2) is true, we know that TripCount really is (1 << BEWidth) and so | |||
695 | // the number of iterations that remain to be run in the original loop is a | |||
696 | // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we | |||
697 | // explicitly check this above). | |||
698 | } else { | |||
699 | // As (BECount + 1) can potentially unsigned overflow we count | |||
700 | // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count. | |||
701 | Value *ModValTmp = B.CreateURem(BECount, | |||
702 | ConstantInt::get(BECount->getType(), | |||
703 | Count)); | |||
704 | Value *ModValAdd = B.CreateAdd(ModValTmp, | |||
705 | ConstantInt::get(ModValTmp->getType(), 1)); | |||
706 | // At that point (BECount % Count) + 1 could be equal to Count. | |||
707 | // To handle this case we need to take mod by Count one more time. | |||
708 | ModVal = B.CreateURem(ModValAdd, | |||
709 | ConstantInt::get(BECount->getType(), Count), | |||
710 | "xtraiter"); | |||
711 | } | |||
712 | Value *BranchVal = | |||
713 | UseEpilogRemainder ? B.CreateICmpULT(BECount, | |||
714 | ConstantInt::get(BECount->getType(), | |||
715 | Count - 1)) : | |||
716 | B.CreateIsNotNull(ModVal, "lcmp.mod"); | |||
717 | BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader; | |||
718 | BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; | |||
719 | // Branch to either remainder (extra iterations) loop or unrolling loop. | |||
720 | B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); | |||
721 | PreHeaderBR->eraseFromParent(); | |||
722 | if (DT) { | |||
723 | if (UseEpilogRemainder) | |||
724 | DT->changeImmediateDominator(NewExit, PreHeader); | |||
725 | else | |||
726 | DT->changeImmediateDominator(PrologExit, PreHeader); | |||
727 | } | |||
728 | Function *F = Header->getParent(); | |||
729 | // Get an ordered list of blocks in the loop to help with the ordering of the | |||
730 | // cloned blocks in the prolog/epilog code | |||
731 | LoopBlocksDFS LoopBlocks(L); | |||
732 | LoopBlocks.perform(LI); | |||
733 | ||||
734 | // | |||
735 | // For each extra loop iteration, create a copy of the loop's basic blocks | |||
736 | // and generate a condition that branches to the copy depending on the | |||
737 | // number of 'left over' iterations. | |||
738 | // | |||
739 | std::vector<BasicBlock *> NewBlocks; | |||
740 | ValueToValueMapTy VMap; | |||
741 | ||||
742 | // For unroll factor 2 remainder loop will have 1 iterations. | |||
743 | // Do not create 1 iteration loop. | |||
744 | bool CreateRemainderLoop = (Count != 2); | |||
745 | ||||
746 | // Clone all the basic blocks in the loop. If Count is 2, we don't clone | |||
747 | // the loop, otherwise we create a cloned loop to execute the extra | |||
748 | // iterations. This function adds the appropriate CFG connections. | |||
749 | BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit; | |||
750 | BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; | |||
751 | Loop *remainderLoop = CloneLoopBlocks( | |||
752 | L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder, | |||
753 | InsertTop, InsertBot, | |||
754 | NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); | |||
755 | ||||
756 | // Insert the cloned blocks into the function. | |||
757 | F->getBasicBlockList().splice(InsertBot->getIterator(), | |||
758 | F->getBasicBlockList(), | |||
759 | NewBlocks[0]->getIterator(), | |||
760 | F->end()); | |||
761 | ||||
762 | // Now the loop blocks are cloned and the other exiting blocks from the | |||
763 | // remainder are connected to the original Loop's exit blocks. The remaining | |||
764 | // work is to update the phi nodes in the original loop, and take in the | |||
765 | // values from the cloned region. Also update the dominator info for | |||
766 | // OtherExits and their immediate successors, since we have new edges into | |||
767 | // OtherExits. | |||
768 | SmallPtrSet<BasicBlock*, 8> ImmediateSuccessorsOfExitBlocks; | |||
769 | for (auto *BB : OtherExits) { | |||
770 | for (auto &II : *BB) { | |||
771 | ||||
772 | // Given we preserve LCSSA form, we know that the values used outside the | |||
773 | // loop will be used through these phi nodes at the exit blocks that are | |||
774 | // transformed below. | |||
775 | if (!isa<PHINode>(II)) | |||
776 | break; | |||
777 | PHINode *Phi = cast<PHINode>(&II); | |||
778 | unsigned oldNumOperands = Phi->getNumIncomingValues(); | |||
779 | // Add the incoming values from the remainder code to the end of the phi | |||
780 | // node. | |||
781 | for (unsigned i =0; i < oldNumOperands; i++){ | |||
782 | Value *newVal = VMap.lookup(Phi->getIncomingValue(i)); | |||
783 | // newVal can be a constant or derived from values outside the loop, and | |||
784 | // hence need not have a VMap value. Also, since lookup already generated | |||
785 | // a default "null" VMap entry for this value, we need to populate that | |||
786 | // VMap entry correctly, with the mapped entry being itself. | |||
787 | if (!newVal) { | |||
788 | newVal = Phi->getIncomingValue(i); | |||
789 | VMap[Phi->getIncomingValue(i)] = Phi->getIncomingValue(i); | |||
790 | } | |||
791 | Phi->addIncoming(newVal, | |||
792 | cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)])); | |||
793 | } | |||
794 | } | |||
795 | #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) | |||
796 | for (BasicBlock *SuccBB : successors(BB)) { | |||
797 | assert(!(any_of(OtherExits,(static_cast <bool> (!(any_of(OtherExits, [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) || SuccBB == LatchExit) && "Breaks the definition of dedicated exits!") ? void (0) : __assert_fail ("!(any_of(OtherExits, [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) || SuccBB == LatchExit) && \"Breaks the definition of dedicated exits!\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Utils/LoopUnrollRuntime.cpp" , 800, __extension__ __PRETTY_FUNCTION__)) | |||
798 | [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) ||(static_cast <bool> (!(any_of(OtherExits, [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) || SuccBB == LatchExit) && "Breaks the definition of dedicated exits!") ? void (0) : __assert_fail ("!(any_of(OtherExits, [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) || SuccBB == LatchExit) && \"Breaks the definition of dedicated exits!\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Utils/LoopUnrollRuntime.cpp" , 800, __extension__ __PRETTY_FUNCTION__)) | |||
799 | SuccBB == LatchExit) &&(static_cast <bool> (!(any_of(OtherExits, [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) || SuccBB == LatchExit) && "Breaks the definition of dedicated exits!") ? void (0) : __assert_fail ("!(any_of(OtherExits, [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) || SuccBB == LatchExit) && \"Breaks the definition of dedicated exits!\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Utils/LoopUnrollRuntime.cpp" , 800, __extension__ __PRETTY_FUNCTION__)) | |||
800 | "Breaks the definition of dedicated exits!")(static_cast <bool> (!(any_of(OtherExits, [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) || SuccBB == LatchExit) && "Breaks the definition of dedicated exits!") ? void (0) : __assert_fail ("!(any_of(OtherExits, [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) || SuccBB == LatchExit) && \"Breaks the definition of dedicated exits!\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Utils/LoopUnrollRuntime.cpp" , 800, __extension__ __PRETTY_FUNCTION__)); | |||
801 | } | |||
802 | #endif | |||
803 | // Update the dominator info because the immediate dominator is no longer the | |||
804 | // header of the original Loop. BB has edges both from L and remainder code. | |||
805 | // Since the preheader determines which loop is run (L or directly jump to | |||
806 | // the remainder code), we set the immediate dominator as the preheader. | |||
807 | if (DT) { | |||
808 | DT->changeImmediateDominator(BB, PreHeader); | |||
809 | // Also update the IDom for immediate successors of BB. If the current | |||
810 | // IDom is the header, update the IDom to be the preheader because that is | |||
811 | // the nearest common dominator of all predecessors of SuccBB. We need to | |||
812 | // check for IDom being the header because successors of exit blocks can | |||
813 | // have edges from outside the loop, and we should not incorrectly update | |||
814 | // the IDom in that case. | |||
815 | for (BasicBlock *SuccBB: successors(BB)) | |||
816 | if (ImmediateSuccessorsOfExitBlocks.insert(SuccBB).second) { | |||
817 | if (DT->getNode(SuccBB)->getIDom()->getBlock() == Header) { | |||
818 | assert(!SuccBB->getSinglePredecessor() &&(static_cast <bool> (!SuccBB->getSinglePredecessor() && "BB should be the IDom then!") ? void (0) : __assert_fail ("!SuccBB->getSinglePredecessor() && \"BB should be the IDom then!\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Utils/LoopUnrollRuntime.cpp" , 819, __extension__ __PRETTY_FUNCTION__)) | |||
819 | "BB should be the IDom then!")(static_cast <bool> (!SuccBB->getSinglePredecessor() && "BB should be the IDom then!") ? void (0) : __assert_fail ("!SuccBB->getSinglePredecessor() && \"BB should be the IDom then!\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/Utils/LoopUnrollRuntime.cpp" , 819, __extension__ __PRETTY_FUNCTION__)); | |||
820 | DT->changeImmediateDominator(SuccBB, PreHeader); | |||
821 | } | |||
822 | } | |||
823 | } | |||
824 | } | |||
825 | ||||
826 | // Loop structure should be the following: | |||
827 | // Epilog Prolog | |||
828 | // | |||
829 | // PreHeader PreHeader | |||
830 | // NewPreHeader PrologPreHeader | |||
831 | // Header PrologHeader | |||
832 | // ... ... | |||
833 | // Latch PrologLatch | |||
834 | // NewExit PrologExit | |||
835 | // EpilogPreHeader NewPreHeader | |||
836 | // EpilogHeader Header | |||
837 | // ... ... | |||
838 | // EpilogLatch Latch | |||
839 | // LatchExit LatchExit | |||
840 | ||||
841 | // Rewrite the cloned instruction operands to use the values created when the | |||
842 | // clone is created. | |||
843 | for (BasicBlock *BB : NewBlocks) { | |||
844 | for (Instruction &I : *BB) { | |||
845 | RemapInstruction(&I, VMap, | |||
846 | RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); | |||
847 | } | |||
848 | } | |||
849 | ||||
850 | if (UseEpilogRemainder) { | |||
851 | // Connect the epilog code to the original loop and update the | |||
852 | // PHI functions. | |||
853 | ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, | |||
854 | EpilogPreHeader, NewPreHeader, VMap, DT, LI, | |||
855 | PreserveLCSSA); | |||
856 | ||||
857 | // Update counter in loop for unrolling. | |||
858 | // I should be multiply of Count. | |||
859 | IRBuilder<> B2(NewPreHeader->getTerminator()); | |||
860 | Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter"); | |||
861 | BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); | |||
862 | B2.SetInsertPoint(LatchBR); | |||
863 | PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter", | |||
864 | Header->getFirstNonPHI()); | |||
865 | Value *IdxSub = | |||
866 | B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), | |||
867 | NewIdx->getName() + ".nsub"); | |||
868 | Value *IdxCmp; | |||
869 | if (LatchBR->getSuccessor(0) == Header) | |||
870 | IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp"); | |||
871 | else | |||
872 | IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp"); | |||
873 | NewIdx->addIncoming(TestVal, NewPreHeader); | |||
874 | NewIdx->addIncoming(IdxSub, Latch); | |||
875 | LatchBR->setCondition(IdxCmp); | |||
876 | } else { | |||
877 | // Connect the prolog code to the original loop and update the | |||
878 | // PHI functions. | |||
879 | ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader, | |||
880 | NewPreHeader, VMap, DT, LI, PreserveLCSSA); | |||
881 | } | |||
882 | ||||
883 | // If this loop is nested, then the loop unroller changes the code in the any | |||
884 | // of its parent loops, so the Scalar Evolution pass needs to be run again. | |||
885 | SE->forgetTopmostLoop(L); | |||
886 | ||||
887 | // Canonicalize to LoopSimplifyForm both original and remainder loops. We | |||
888 | // cannot rely on the LoopUnrollPass to do this because it only does | |||
889 | // canonicalization for parent/subloops and not the sibling loops. | |||
890 | if (OtherExits.size() > 0) { | |||
891 | // Generate dedicated exit blocks for the original loop, to preserve | |||
892 | // LoopSimplifyForm. | |||
893 | formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA); | |||
894 | // Generate dedicated exit blocks for the remainder loop if one exists, to | |||
895 | // preserve LoopSimplifyForm. | |||
896 | if (remainderLoop) | |||
897 | formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA); | |||
898 | } | |||
899 | ||||
900 | if (remainderLoop && UnrollRemainder) { | |||
901 | LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Unrolling remainder loop\n" ; } } while (false); | |||
902 | UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1, | |||
903 | /*Force*/ false, /*AllowRuntime*/ false, | |||
904 | /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true, | |||
905 | /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1, | |||
906 | /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC, | |||
907 | /*ORE*/ nullptr, PreserveLCSSA); | |||
908 | } | |||
909 | ||||
910 | NumRuntimeUnrolled++; | |||
911 | return true; | |||
912 | } |