LLVM 17.0.0git
LoopUnrollRuntime.cpp
Go to the documentation of this file.
1//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements some loop unrolling utilities for loops with run-time
10// trip counts. See LoopUnroll.cpp for unrolling loops with compile-time
11// trip counts.
12//
13// The functions in this file are used to generate extra code when the
14// run-time trip count modulo the unroll factor is not 0. When this is the
15// case, we need to generate code to execute these 'left over' iterations.
16//
17// The current strategy generates an if-then-else sequence prior to the
18// unrolled loop to execute the 'left over' iterations before or after the
19// unrolled loop.
20//
21//===----------------------------------------------------------------------===//
22
23#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/MDBuilder.h"
32#include "llvm/IR/Module.h"
35#include "llvm/Support/Debug.h"
43#include <algorithm>
44
45using namespace llvm;
46
47#define DEBUG_TYPE "loop-unroll"
48
49STATISTIC(NumRuntimeUnrolled,
50 "Number of loops unrolled with run-time trip counts");
52 "unroll-runtime-multi-exit", cl::init(false), cl::Hidden,
53 cl::desc("Allow runtime unrolling for loops with multiple exits, when "
54 "epilog is generated"));
56 "unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden,
57 cl::desc("Assume the non latch exit block to be predictable"));
58
59/// Connect the unrolling prolog code to the original loop.
60/// The unrolling prolog code contains code to execute the
61/// 'extra' iterations if the run-time trip count modulo the
62/// unroll count is non-zero.
63///
64/// This function performs the following:
65/// - Create PHI nodes at prolog end block to combine values
66/// that exit the prolog code and jump around the prolog.
67/// - Add a PHI operand to a PHI node at the loop exit block
68/// for values that exit the prolog and go around the loop.
69/// - Branch around the original loop if the trip count is less
70/// than the unroll factor.
71///
72static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
73 BasicBlock *PrologExit,
74 BasicBlock *OriginalLoopLatchExit,
75 BasicBlock *PreHeader, BasicBlock *NewPreHeader,
77 LoopInfo *LI, bool PreserveLCSSA,
78 ScalarEvolution &SE) {
79 // Loop structure should be the following:
80 // Preheader
81 // PrologHeader
82 // ...
83 // PrologLatch
84 // PrologExit
85 // NewPreheader
86 // Header
87 // ...
88 // Latch
89 // LatchExit
90 BasicBlock *Latch = L->getLoopLatch();
91 assert(Latch && "Loop must have a latch");
92 BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
93
94 // Create a PHI node for each outgoing value from the original loop
95 // (which means it is an outgoing value from the prolog code too).
96 // The new PHI node is inserted in the prolog end basic block.
97 // The new PHI node value is added as an operand of a PHI node in either
98 // the loop header or the loop exit block.
99 for (BasicBlock *Succ : successors(Latch)) {
100 for (PHINode &PN : Succ->phis()) {
101 // Add a new PHI node to the prolog end block and add the
102 // appropriate incoming values.
103 // TODO: This code assumes that the PrologExit (or the LatchExit block for
104 // prolog loop) contains only one predecessor from the loop, i.e. the
105 // PrologLatch. When supporting multiple-exiting block loops, we can have
106 // two or more blocks that have the LatchExit as the target in the
107 // original loop.
108 PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
109 PrologExit->getFirstNonPHI());
110 // Adding a value to the new PHI node from the original loop preheader.
111 // This is the value that skips all the prolog code.
112 if (L->contains(&PN)) {
113 // Succ is loop header.
114 NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader),
115 PreHeader);
116 } else {
117 // Succ is LatchExit.
118 NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader);
119 }
120
121 Value *V = PN.getIncomingValueForBlock(Latch);
122 if (Instruction *I = dyn_cast<Instruction>(V)) {
123 if (L->contains(I)) {
124 V = VMap.lookup(I);
125 }
126 }
127 // Adding a value to the new PHI node from the last prolog block
128 // that was created.
129 NewPN->addIncoming(V, PrologLatch);
130
131 // Update the existing PHI node operand with the value from the
132 // new PHI node. How this is done depends on if the existing
133 // PHI node is in the original loop block, or the exit block.
134 if (L->contains(&PN))
135 PN.setIncomingValueForBlock(NewPreHeader, NewPN);
136 else
137 PN.addIncoming(NewPN, PrologExit);
138 SE.forgetValue(&PN);
139 }
140 }
141
142 // Make sure that created prolog loop is in simplified form
143 SmallVector<BasicBlock *, 4> PrologExitPreds;
144 Loop *PrologLoop = LI->getLoopFor(PrologLatch);
145 if (PrologLoop) {
146 for (BasicBlock *PredBB : predecessors(PrologExit))
147 if (PrologLoop->contains(PredBB))
148 PrologExitPreds.push_back(PredBB);
149
150 SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,
151 nullptr, PreserveLCSSA);
152 }
153
154 // Create a branch around the original loop, which is taken if there are no
155 // iterations remaining to be executed after running the prologue.
156 Instruction *InsertPt = PrologExit->getTerminator();
157 IRBuilder<> B(InsertPt);
158
159 assert(Count != 0 && "nonsensical Count!");
160
161 // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
162 // This means %xtraiter is (BECount + 1) and all of the iterations of this
163 // loop were executed by the prologue. Note that if BECount <u (Count - 1)
164 // then (BECount + 1) cannot unsigned-overflow.
165 Value *BrLoopExit =
166 B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
167 // Split the exit to maintain loop canonicalization guarantees
168 SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));
169 SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
170 nullptr, PreserveLCSSA);
171 // Add the branch to the exit block (around the unrolled loop)
172 B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
173 InsertPt->eraseFromParent();
174 if (DT) {
175 auto *NewDom = DT->findNearestCommonDominator(OriginalLoopLatchExit,
176 PrologExit);
177 DT->changeImmediateDominator(OriginalLoopLatchExit, NewDom);
178 }
179}
180
181/// Connect the unrolling epilog code to the original loop.
182/// The unrolling epilog code contains code to execute the
183/// 'extra' iterations if the run-time trip count modulo the
184/// unroll count is non-zero.
185///
186/// This function performs the following:
187/// - Update PHI nodes at the unrolling loop exit and epilog loop exit
188/// - Create PHI nodes at the unrolling loop exit to combine
189/// values that exit the unrolling loop code and jump around it.
190/// - Update PHI operands in the epilog loop by the new PHI nodes
191/// - Branch around the epilog loop if extra iters (ModVal) is zero.
192///
193static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
194 BasicBlock *Exit, BasicBlock *PreHeader,
195 BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
197 LoopInfo *LI, bool PreserveLCSSA,
198 ScalarEvolution &SE) {
199 BasicBlock *Latch = L->getLoopLatch();
200 assert(Latch && "Loop must have a latch");
201 BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
202
203 // Loop structure should be the following:
204 //
205 // PreHeader
206 // NewPreHeader
207 // Header
208 // ...
209 // Latch
210 // NewExit (PN)
211 // EpilogPreHeader
212 // EpilogHeader
213 // ...
214 // EpilogLatch
215 // Exit (EpilogPN)
216
217 // Update PHI nodes at NewExit and Exit.
218 for (PHINode &PN : NewExit->phis()) {
219 // PN should be used in another PHI located in Exit block as
220 // Exit was split by SplitBlockPredecessors into Exit and NewExit
221 // Basically it should look like:
222 // NewExit:
223 // PN = PHI [I, Latch]
224 // ...
225 // Exit:
226 // EpilogPN = PHI [PN, EpilogPreHeader], [X, Exit2], [Y, Exit2.epil]
227 //
228 // Exits from non-latch blocks point to the original exit block and the
229 // epilogue edges have already been added.
230 //
231 // There is EpilogPreHeader incoming block instead of NewExit as
232 // NewExit was spilt 1 more time to get EpilogPreHeader.
233 assert(PN.hasOneUse() && "The phi should have 1 use");
234 PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser());
235 assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
236
237 // Add incoming PreHeader from branch around the Loop
238 PN.addIncoming(UndefValue::get(PN.getType()), PreHeader);
239 SE.forgetValue(&PN);
240
241 Value *V = PN.getIncomingValueForBlock(Latch);
242 Instruction *I = dyn_cast<Instruction>(V);
243 if (I && L->contains(I))
244 // If value comes from an instruction in the loop add VMap value.
245 V = VMap.lookup(I);
246 // For the instruction out of the loop, constant or undefined value
247 // insert value itself.
248 EpilogPN->addIncoming(V, EpilogLatch);
249
250 assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
251 "EpilogPN should have EpilogPreHeader incoming block");
252 // Change EpilogPreHeader incoming block to NewExit.
253 EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
254 NewExit);
255 // Now PHIs should look like:
256 // NewExit:
257 // PN = PHI [I, Latch], [undef, PreHeader]
258 // ...
259 // Exit:
260 // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
261 }
262
263 // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
264 // Update corresponding PHI nodes in epilog loop.
265 for (BasicBlock *Succ : successors(Latch)) {
266 // Skip this as we already updated phis in exit blocks.
267 if (!L->contains(Succ))
268 continue;
269 for (PHINode &PN : Succ->phis()) {
270 // Add new PHI nodes to the loop exit block and update epilog
271 // PHIs with the new PHI values.
272 PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
273 NewExit->getFirstNonPHI());
274 // Adding a value to the new PHI node from the unrolling loop preheader.
275 NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
276 // Adding a value to the new PHI node from the unrolling loop latch.
277 NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch);
278
279 // Update the existing PHI node operand with the value from the new PHI
280 // node. Corresponding instruction in epilog loop should be PHI.
281 PHINode *VPN = cast<PHINode>(VMap[&PN]);
282 VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN);
283 }
284 }
285
286 Instruction *InsertPt = NewExit->getTerminator();
287 IRBuilder<> B(InsertPt);
288 Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
289 assert(Exit && "Loop must have a single exit block only");
290 // Split the epilogue exit to maintain loop canonicalization guarantees
292 SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
293 PreserveLCSSA);
294 // Add the branch to the exit block (around the unrolling loop)
295 B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
296 InsertPt->eraseFromParent();
297 if (DT) {
298 auto *NewDom = DT->findNearestCommonDominator(Exit, NewExit);
299 DT->changeImmediateDominator(Exit, NewDom);
300 }
301
302 // Split the main loop exit to maintain canonicalization guarantees.
303 SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
304 SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr,
305 PreserveLCSSA);
306}
307
308/// Create a clone of the blocks in a loop and connect them together. A new
309/// loop will be created including all cloned blocks, and the iterator of the
310/// new loop switched to count NewIter down to 0.
311/// The cloned blocks should be inserted between InsertTop and InsertBot.
312/// InsertTop should be new preheader, InsertBot new loop exit.
313/// Returns the new cloned loop that is created.
314static Loop *
315CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
316 const bool UnrollRemainder,
317 BasicBlock *InsertTop,
318 BasicBlock *InsertBot, BasicBlock *Preheader,
319 std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
320 ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
321 StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
322 BasicBlock *Header = L->getHeader();
323 BasicBlock *Latch = L->getLoopLatch();
324 Function *F = Header->getParent();
325 LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
326 LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
327 Loop *ParentLoop = L->getParentLoop();
328 NewLoopsMap NewLoops;
329 NewLoops[ParentLoop] = ParentLoop;
330
331 // For each block in the original loop, create a new copy,
332 // and update the value map with the newly created values.
333 for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
334 BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
335 NewBlocks.push_back(NewBB);
336
337 addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
338
339 VMap[*BB] = NewBB;
340 if (Header == *BB) {
341 // For the first block, add a CFG connection to this newly
342 // created block.
343 InsertTop->getTerminator()->setSuccessor(0, NewBB);
344 }
345
346 if (DT) {
347 if (Header == *BB) {
348 // The header is dominated by the preheader.
349 DT->addNewBlock(NewBB, InsertTop);
350 } else {
351 // Copy information from original loop to unrolled loop.
352 BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
353 DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
354 }
355 }
356
357 if (Latch == *BB) {
358 // For the last block, create a loop back to cloned head.
359 VMap.erase((*BB)->getTerminator());
360 // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
361 // Subtle: NewIter can be 0 if we wrapped when computing the trip count,
362 // thus we must compare the post-increment (wrapping) value.
363 BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
364 BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
365 IRBuilder<> Builder(LatchBR);
366 PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
367 suffix + ".iter",
368 FirstLoopBB->getFirstNonPHI());
369 auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
370 auto *One = ConstantInt::get(NewIdx->getType(), 1);
371 Value *IdxNext = Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
372 Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp");
373 Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
374 NewIdx->addIncoming(Zero, InsertTop);
375 NewIdx->addIncoming(IdxNext, NewBB);
376 LatchBR->eraseFromParent();
377 }
378 }
379
380 // Change the incoming values to the ones defined in the preheader or
381 // cloned loop.
382 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
383 PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
384 unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
385 NewPHI->setIncomingBlock(idx, InsertTop);
386 BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
387 idx = NewPHI->getBasicBlockIndex(Latch);
388 Value *InVal = NewPHI->getIncomingValue(idx);
389 NewPHI->setIncomingBlock(idx, NewLatch);
390 if (Value *V = VMap.lookup(InVal))
391 NewPHI->setIncomingValue(idx, V);
392 }
393
394 Loop *NewLoop = NewLoops[L];
395 assert(NewLoop && "L should have been cloned");
396 MDNode *LoopID = NewLoop->getLoopID();
397
398 // Only add loop metadata if the loop is not going to be completely
399 // unrolled.
400 if (UnrollRemainder)
401 return NewLoop;
402
403 std::optional<MDNode *> NewLoopID = makeFollowupLoopID(
405 if (NewLoopID) {
406 NewLoop->setLoopID(*NewLoopID);
407
408 // Do not setLoopAlreadyUnrolled if loop attributes have been defined
409 // explicitly.
410 return NewLoop;
411 }
412
413 // Add unroll disable metadata to disable future unrolling for this loop.
414 NewLoop->setLoopAlreadyUnrolled();
415 return NewLoop;
416}
417
418/// Returns true if we can profitably unroll the multi-exit loop L. Currently,
419/// we return true only if UnrollRuntimeMultiExit is set to true.
421 Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
422 bool UseEpilogRemainder) {
423
424 // Priority goes to UnrollRuntimeMultiExit if it's supplied.
425 if (UnrollRuntimeMultiExit.getNumOccurrences())
427
428 // The main pain point with multi-exit loop unrolling is that once unrolled,
429 // we will not be able to merge all blocks into a straight line code.
430 // There are branches within the unrolled loop that go to the OtherExits.
431 // The second point is the increase in code size, but this is true
432 // irrespective of multiple exits.
433
434 // Note: Both the heuristics below are coarse grained. We are essentially
435 // enabling unrolling of loops that have a single side exit other than the
436 // normal LatchExit (i.e. exiting into a deoptimize block).
437 // The heuristics considered are:
438 // 1. low number of branches in the unrolled version.
439 // 2. high predictability of these extra branches.
440 // We avoid unrolling loops that have more than two exiting blocks. This
441 // limits the total number of branches in the unrolled loop to be atmost
442 // the unroll factor (since one of the exiting blocks is the latch block).
443 SmallVector<BasicBlock*, 4> ExitingBlocks;
444 L->getExitingBlocks(ExitingBlocks);
445 if (ExitingBlocks.size() > 2)
446 return false;
447
448 // Allow unrolling of loops with no non latch exit blocks.
449 if (OtherExits.size() == 0)
450 return true;
451
452 // The second heuristic is that L has one exit other than the latchexit and
453 // that exit is a deoptimize block. We know that deoptimize blocks are rarely
454 // taken, which also implies the branch leading to the deoptimize block is
455 // highly predictable. When UnrollRuntimeOtherExitPredictable is specified, we
456 // assume the other exit branch is predictable even if it has no deoptimize
457 // call.
458 return (OtherExits.size() == 1 &&
460 OtherExits[0]->getTerminatingDeoptimizeCall()));
461 // TODO: These can be fine-tuned further to consider code size or deopt states
462 // that are captured by the deoptimize exit block.
463 // Also, we can extend this to support more cases, if we actually
464 // know of kinds of multiexit loops that would benefit from unrolling.
465}
466
467// Assign the maximum possible trip count as the back edge weight for the
468// remainder loop if the original loop comes with a branch weight.
470 Loop *RemainderLoop,
471 uint64_t UnrollFactor) {
472 uint64_t TrueWeight, FalseWeight;
473 BranchInst *LatchBR =
474 cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator());
475 if (!extractBranchWeights(*LatchBR, TrueWeight, FalseWeight))
476 return;
477 uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader()
478 ? FalseWeight
479 : TrueWeight;
480 assert(UnrollFactor > 1);
481 uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight;
482 BasicBlock *Header = RemainderLoop->getHeader();
483 BasicBlock *Latch = RemainderLoop->getLoopLatch();
484 auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator());
485 unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1);
486 MDBuilder MDB(RemainderLatchBR->getContext());
487 MDNode *WeightNode =
488 HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
489 : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
490 RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
491}
492
493/// Calculate ModVal = (BECount + 1) % Count on the abstract integer domain
494/// accounting for the possibility of unsigned overflow in the 2s complement
495/// domain. Preconditions:
496/// 1) TripCount = BECount + 1 (allowing overflow)
497/// 2) Log2(Count) <= BitWidth(BECount)
499 Value *TripCount, unsigned Count) {
500 // Note that TripCount is BECount + 1.
501 if (isPowerOf2_32(Count))
502 // If the expression is zero, then either:
503 // 1. There are no iterations to be run in the prolog/epilog loop.
504 // OR
505 // 2. The addition computing TripCount overflowed.
506 //
507 // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
508 // the number of iterations that remain to be run in the original loop is a
509 // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (a
510 // precondition of this method).
511 return B.CreateAnd(TripCount, Count - 1, "xtraiter");
512
513 // As (BECount + 1) can potentially unsigned overflow we count
514 // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
515 Constant *CountC = ConstantInt::get(BECount->getType(), Count);
516 Value *ModValTmp = B.CreateURem(BECount, CountC);
517 Value *ModValAdd = B.CreateAdd(ModValTmp,
518 ConstantInt::get(ModValTmp->getType(), 1));
519 // At that point (BECount % Count) + 1 could be equal to Count.
520 // To handle this case we need to take mod by Count one more time.
521 return B.CreateURem(ModValAdd, CountC, "xtraiter");
522}
523
524
525/// Insert code in the prolog/epilog code when unrolling a loop with a
526/// run-time trip-count.
527///
528/// This method assumes that the loop unroll factor is total number
529/// of loop bodies in the loop after unrolling. (Some folks refer
530/// to the unroll factor as the number of *extra* copies added).
531/// We assume also that the loop unroll factor is a power-of-two. So, after
532/// unrolling the loop, the number of loop bodies executed is 2,
533/// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch
534/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
535/// the switch instruction is generated.
536///
537/// ***Prolog case***
538/// extraiters = tripcount % loopfactor
539/// if (extraiters == 0) jump Loop:
540/// else jump Prol:
541/// Prol: LoopBody;
542/// extraiters -= 1 // Omitted if unroll factor is 2.
543/// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.
544/// if (tripcount < loopfactor) jump End:
545/// Loop:
546/// ...
547/// End:
548///
549/// ***Epilog case***
550/// extraiters = tripcount % loopfactor
551/// if (tripcount < loopfactor) jump LoopExit:
552/// unroll_iters = tripcount - extraiters
553/// Loop: LoopBody; (executes unroll_iter times);
554/// unroll_iter -= 1
555/// if (unroll_iter != 0) jump Loop:
556/// LoopExit:
557/// if (extraiters == 0) jump EpilExit:
558/// Epil: LoopBody; (executes extraiters times)
559/// extraiters -= 1 // Omitted if unroll factor is 2.
560/// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.
561/// EpilExit:
562
564 Loop *L, unsigned Count, bool AllowExpensiveTripCount,
565 bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV,
567 const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop) {
568 LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
569 LLVM_DEBUG(L->dump());
570 LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
571 : dbgs() << "Using prolog remainder.\n");
572
573 // Make sure the loop is in canonical form.
574 if (!L->isLoopSimplifyForm()) {
575 LLVM_DEBUG(dbgs() << "Not in simplify form!\n");
576 return false;
577 }
578
579 // Guaranteed by LoopSimplifyForm.
580 BasicBlock *Latch = L->getLoopLatch();
581 BasicBlock *Header = L->getHeader();
582
583 BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
584
585 if (!LatchBR || LatchBR->isUnconditional()) {
586 // The loop-rotate pass can be helpful to avoid this in many cases.
588 dbgs()
589 << "Loop latch not terminated by a conditional branch.\n");
590 return false;
591 }
592
593 unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
594 BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
595
596 if (L->contains(LatchExit)) {
597 // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
598 // targets of the Latch be an exit block out of the loop.
600 dbgs()
601 << "One of the loop latch successors must be the exit block.\n");
602 return false;
603 }
604
605 // These are exit blocks other than the target of the latch exiting block.
607 L->getUniqueNonLatchExitBlocks(OtherExits);
608 // Support only single exit and exiting block unless multi-exit loop
609 // unrolling is enabled.
610 if (!L->getExitingBlock() || OtherExits.size()) {
611 // We rely on LCSSA form being preserved when the exit blocks are transformed.
612 // (Note that only an off-by-default mode of the old PM disables PreserveLCCA.)
613 if (!PreserveLCSSA)
614 return false;
615
616 if (!canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit,
617 UseEpilogRemainder)) {
619 dbgs()
620 << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
621 "enabled!\n");
622 return false;
623 }
624 }
625 // Use Scalar Evolution to compute the trip count. This allows more loops to
626 // be unrolled than relying on induction var simplification.
627 if (!SE)
628 return false;
629
630 // Only unroll loops with a computable trip count.
631 // We calculate the backedge count by using getExitCount on the Latch block,
632 // which is proven to be the only exiting block in this loop. This is same as
633 // calculating getBackedgeTakenCount on the loop (which computes SCEV for all
634 // exiting blocks).
635 const SCEV *BECountSC = SE->getExitCount(L, Latch);
636 if (isa<SCEVCouldNotCompute>(BECountSC)) {
637 LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n");
638 return false;
639 }
640
641 unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
642
643 // Add 1 since the backedge count doesn't include the first loop iteration.
644 // (Note that overflow can occur, this is handled explicitly below)
645 const SCEV *TripCountSC =
646 SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
647 if (isa<SCEVCouldNotCompute>(TripCountSC)) {
648 LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n");
649 return false;
650 }
651
652 BasicBlock *PreHeader = L->getLoopPreheader();
653 BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
654 const DataLayout &DL = Header->getModule()->getDataLayout();
655 SCEVExpander Expander(*SE, DL, "loop-unroll");
656 if (!AllowExpensiveTripCount &&
657 Expander.isHighCostExpansion(TripCountSC, L, SCEVCheapExpansionBudget,
658 TTI, PreHeaderBR)) {
659 LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n");
660 return false;
661 }
662
663 // This constraint lets us deal with an overflowing trip count easily; see the
664 // comment on ModVal below.
665 if (Log2_32(Count) > BEWidth) {
667 dbgs()
668 << "Count failed constraint on overflow trip count calculation.\n");
669 return false;
670 }
671
672 // Loop structure is the following:
673 //
674 // PreHeader
675 // Header
676 // ...
677 // Latch
678 // LatchExit
679
680 BasicBlock *NewPreHeader;
681 BasicBlock *NewExit = nullptr;
682 BasicBlock *PrologExit = nullptr;
683 BasicBlock *EpilogPreHeader = nullptr;
684 BasicBlock *PrologPreHeader = nullptr;
685
686 if (UseEpilogRemainder) {
687 // If epilog remainder
688 // Split PreHeader to insert a branch around loop for unrolling.
689 NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
690 NewPreHeader->setName(PreHeader->getName() + ".new");
691 // Split LatchExit to create phi nodes from branch above.
692 NewExit = SplitBlockPredecessors(LatchExit, {Latch}, ".unr-lcssa", DT, LI,
693 nullptr, PreserveLCSSA);
694 // NewExit gets its DebugLoc from LatchExit, which is not part of the
695 // original Loop.
696 // Fix this by setting Loop's DebugLoc to NewExit.
697 auto *NewExitTerminator = NewExit->getTerminator();
698 NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());
699 // Split NewExit to insert epilog remainder loop.
700 EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
701 EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
702
703 // If the latch exits from multiple level of nested loops, then
704 // by assumption there must be another loop exit which branches to the
705 // outer loop and we must adjust the loop for the newly inserted blocks
706 // to account for the fact that our epilogue is still in the same outer
707 // loop. Note that this leaves loopinfo temporarily out of sync with the
708 // CFG until the actual epilogue loop is inserted.
709 if (auto *ParentL = L->getParentLoop())
710 if (LI->getLoopFor(LatchExit) != ParentL) {
711 LI->removeBlock(NewExit);
712 ParentL->addBasicBlockToLoop(NewExit, *LI);
713 LI->removeBlock(EpilogPreHeader);
714 ParentL->addBasicBlockToLoop(EpilogPreHeader, *LI);
715 }
716
717 } else {
718 // If prolog remainder
719 // Split the original preheader twice to insert prolog remainder loop
720 PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
721 PrologPreHeader->setName(Header->getName() + ".prol.preheader");
722 PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
723 DT, LI);
724 PrologExit->setName(Header->getName() + ".prol.loopexit");
725 // Split PrologExit to get NewPreHeader.
726 NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
727 NewPreHeader->setName(PreHeader->getName() + ".new");
728 }
729 // Loop structure should be the following:
730 // Epilog Prolog
731 //
732 // PreHeader PreHeader
733 // *NewPreHeader *PrologPreHeader
734 // Header *PrologExit
735 // ... *NewPreHeader
736 // Latch Header
737 // *NewExit ...
738 // *EpilogPreHeader Latch
739 // LatchExit LatchExit
740
741 // Calculate conditions for branch around loop for unrolling
742 // in epilog case and around prolog remainder loop in prolog case.
743 // Compute the number of extra iterations required, which is:
744 // extra iterations = run-time trip count % loop unroll factor
745 PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
746 IRBuilder<> B(PreHeaderBR);
747 Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
748 PreHeaderBR);
749 Value *BECount;
750 // If there are other exits before the latch, that may cause the latch exit
751 // branch to never be executed, and the latch exit count may be poison.
752 // In this case, freeze the TripCount and base BECount on the frozen
753 // TripCount. We will introduce two branches using these values, and it's
754 // important that they see a consistent value (which would not be guaranteed
755 // if were frozen independently.)
756 if ((!OtherExits.empty() || !SE->loopHasNoAbnormalExits(L)) &&
757 !isGuaranteedNotToBeUndefOrPoison(TripCount, AC, PreHeaderBR, DT)) {
758 TripCount = B.CreateFreeze(TripCount);
759 BECount =
760 B.CreateAdd(TripCount, ConstantInt::get(TripCount->getType(), -1));
761 } else {
762 // If we don't need to freeze, use SCEVExpander for BECount as well, to
763 // allow slightly better value reuse.
764 BECount =
765 Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR);
766 }
767
768 Value * const ModVal = CreateTripRemainder(B, BECount, TripCount, Count);
769
770 Value *BranchVal =
771 UseEpilogRemainder ? B.CreateICmpULT(BECount,
772 ConstantInt::get(BECount->getType(),
773 Count - 1)) :
774 B.CreateIsNotNull(ModVal, "lcmp.mod");
775 BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
776 BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
777 // Branch to either remainder (extra iterations) loop or unrolling loop.
778 B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
779 PreHeaderBR->eraseFromParent();
780 if (DT) {
781 if (UseEpilogRemainder)
782 DT->changeImmediateDominator(NewExit, PreHeader);
783 else
784 DT->changeImmediateDominator(PrologExit, PreHeader);
785 }
786 Function *F = Header->getParent();
787 // Get an ordered list of blocks in the loop to help with the ordering of the
788 // cloned blocks in the prolog/epilog code
789 LoopBlocksDFS LoopBlocks(L);
790 LoopBlocks.perform(LI);
791
792 //
793 // For each extra loop iteration, create a copy of the loop's basic blocks
794 // and generate a condition that branches to the copy depending on the
795 // number of 'left over' iterations.
796 //
797 std::vector<BasicBlock *> NewBlocks;
799
800 // Clone all the basic blocks in the loop. If Count is 2, we don't clone
801 // the loop, otherwise we create a cloned loop to execute the extra
802 // iterations. This function adds the appropriate CFG connections.
803 BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
804 BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
805 Loop *remainderLoop = CloneLoopBlocks(
806 L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot,
807 NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
808
809 // Assign the maximum possible trip count as the back edge weight for the
810 // remainder loop if the original loop comes with a branch weight.
811 if (remainderLoop && !UnrollRemainder)
812 updateLatchBranchWeightsForRemainderLoop(L, remainderLoop, Count);
813
814 // Insert the cloned blocks into the function.
815 F->splice(InsertBot->getIterator(), F, NewBlocks[0]->getIterator(), F->end());
816
817 // Now the loop blocks are cloned and the other exiting blocks from the
818 // remainder are connected to the original Loop's exit blocks. The remaining
819 // work is to update the phi nodes in the original loop, and take in the
820 // values from the cloned region.
821 for (auto *BB : OtherExits) {
822 // Given we preserve LCSSA form, we know that the values used outside the
823 // loop will be used through these phi nodes at the exit blocks that are
824 // transformed below.
825 for (PHINode &PN : BB->phis()) {
826 unsigned oldNumOperands = PN.getNumIncomingValues();
827 // Add the incoming values from the remainder code to the end of the phi
828 // node.
829 for (unsigned i = 0; i < oldNumOperands; i++){
830 auto *PredBB =PN.getIncomingBlock(i);
831 if (PredBB == Latch)
832 // The latch exit is handled seperately, see connectX
833 continue;
834 if (!L->contains(PredBB))
835 // Even if we had dedicated exits, the code above inserted an
836 // extra branch which can reach the latch exit.
837 continue;
838
839 auto *V = PN.getIncomingValue(i);
840 if (Instruction *I = dyn_cast<Instruction>(V))
841 if (L->contains(I))
842 V = VMap.lookup(I);
843 PN.addIncoming(V, cast<BasicBlock>(VMap[PredBB]));
844 }
845 }
846#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
847 for (BasicBlock *SuccBB : successors(BB)) {
848 assert(!(llvm::is_contained(OtherExits, SuccBB) || SuccBB == LatchExit) &&
849 "Breaks the definition of dedicated exits!");
850 }
851#endif
852 }
853
854 // Update the immediate dominator of the exit blocks and blocks that are
855 // reachable from the exit blocks. This is needed because we now have paths
856 // from both the original loop and the remainder code reaching the exit
857 // blocks. While the IDom of these exit blocks were from the original loop,
858 // now the IDom is the preheader (which decides whether the original loop or
859 // remainder code should run).
860 if (DT && !L->getExitingBlock()) {
861 SmallVector<BasicBlock *, 16> ChildrenToUpdate;
862 // NB! We have to examine the dom children of all loop blocks, not just
863 // those which are the IDom of the exit blocks. This is because blocks
864 // reachable from the exit blocks can have their IDom as the nearest common
865 // dominator of the exit blocks.
866 for (auto *BB : L->blocks()) {
867 auto *DomNodeBB = DT->getNode(BB);
868 for (auto *DomChild : DomNodeBB->children()) {
869 auto *DomChildBB = DomChild->getBlock();
870 if (!L->contains(LI->getLoopFor(DomChildBB)))
871 ChildrenToUpdate.push_back(DomChildBB);
872 }
873 }
874 for (auto *BB : ChildrenToUpdate)
875 DT->changeImmediateDominator(BB, PreHeader);
876 }
877
878 // Loop structure should be the following:
879 // Epilog Prolog
880 //
881 // PreHeader PreHeader
882 // NewPreHeader PrologPreHeader
883 // Header PrologHeader
884 // ... ...
885 // Latch PrologLatch
886 // NewExit PrologExit
887 // EpilogPreHeader NewPreHeader
888 // EpilogHeader Header
889 // ... ...
890 // EpilogLatch Latch
891 // LatchExit LatchExit
892
893 // Rewrite the cloned instruction operands to use the values created when the
894 // clone is created.
895 for (BasicBlock *BB : NewBlocks) {
896 for (Instruction &I : *BB) {
897 RemapInstruction(&I, VMap,
899 }
900 }
901
902 if (UseEpilogRemainder) {
903 // Connect the epilog code to the original loop and update the
904 // PHI functions.
905 ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader,
906 NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE);
907
908 // Update counter in loop for unrolling.
909 // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
910 // Subtle: TestVal can be 0 if we wrapped when computing the trip count,
911 // thus we must compare the post-increment (wrapping) value.
912 IRBuilder<> B2(NewPreHeader->getTerminator());
913 Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
914 BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
915 PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
916 Header->getFirstNonPHI());
917 B2.SetInsertPoint(LatchBR);
918 auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
919 auto *One = ConstantInt::get(NewIdx->getType(), 1);
920 Value *IdxNext = B2.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
921 auto Pred = LatchBR->getSuccessor(0) == Header ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
922 Value *IdxCmp = B2.CreateICmp(Pred, IdxNext, TestVal, NewIdx->getName() + ".ncmp");
923 NewIdx->addIncoming(Zero, NewPreHeader);
924 NewIdx->addIncoming(IdxNext, Latch);
925 LatchBR->setCondition(IdxCmp);
926 } else {
927 // Connect the prolog code to the original loop and update the
928 // PHI functions.
929 ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,
930 NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE);
931 }
932
933 // If this loop is nested, then the loop unroller changes the code in the any
934 // of its parent loops, so the Scalar Evolution pass needs to be run again.
935 SE->forgetTopmostLoop(L);
936
937 // Verify that the Dom Tree and Loop Info are correct.
938#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
939 if (DT) {
940 assert(DT->verify(DominatorTree::VerificationLevel::Full));
941 LI->verify(*DT);
942 }
943#endif
944
945 // For unroll factor 2 remainder loop will have 1 iteration.
946 if (Count == 2 && DT && LI && SE) {
947 // TODO: This code could probably be pulled out into a helper function
948 // (e.g. breakLoopBackedgeAndSimplify) and reused in loop-deletion.
949 BasicBlock *RemainderLatch = remainderLoop->getLoopLatch();
950 assert(RemainderLatch);
951 SmallVector<BasicBlock*> RemainderBlocks(remainderLoop->getBlocks().begin(),
952 remainderLoop->getBlocks().end());
953 breakLoopBackedge(remainderLoop, *DT, *SE, *LI, nullptr);
954 remainderLoop = nullptr;
955
956 // Simplify loop values after breaking the backedge
957 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
959 for (BasicBlock *BB : RemainderBlocks) {
960 for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
961 if (Value *V = simplifyInstruction(&Inst, {DL, nullptr, DT, AC}))
962 if (LI->replacementPreservesLCSSAForm(&Inst, V))
963 Inst.replaceAllUsesWith(V);
965 DeadInsts.emplace_back(&Inst);
966 }
967 // We can't do recursive deletion until we're done iterating, as we might
968 // have a phi which (potentially indirectly) uses instructions later in
969 // the block we're iterating through.
971 }
972
973 // Merge latch into exit block.
974 auto *ExitBB = RemainderLatch->getSingleSuccessor();
975 assert(ExitBB && "required after breaking cond br backedge");
976 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
977 MergeBlockIntoPredecessor(ExitBB, &DTU, LI);
978 }
979
980 // Canonicalize to LoopSimplifyForm both original and remainder loops. We
981 // cannot rely on the LoopUnrollPass to do this because it only does
982 // canonicalization for parent/subloops and not the sibling loops.
983 if (OtherExits.size() > 0) {
984 // Generate dedicated exit blocks for the original loop, to preserve
985 // LoopSimplifyForm.
986 formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA);
987 // Generate dedicated exit blocks for the remainder loop if one exists, to
988 // preserve LoopSimplifyForm.
989 if (remainderLoop)
990 formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA);
991 }
992
993 auto UnrollResult = LoopUnrollResult::Unmodified;
994 if (remainderLoop && UnrollRemainder) {
995 LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
996 UnrollResult =
997 UnrollLoop(remainderLoop,
998 {/*Count*/ Count - 1, /*Force*/ false, /*Runtime*/ false,
999 /*AllowExpensiveTripCount*/ false,
1000 /*UnrollRemainder*/ false, ForgetAllSCEV},
1001 LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA);
1002 }
1003
1004 if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
1005 *ResultLoop = remainderLoop;
1006 NumRuntimeUnrolled++;
1007 return true;
1008}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
assume Assume Builder
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static bool canProfitablyUnrollMultiExitLoop(Loop *L, SmallVectorImpl< BasicBlock * > &OtherExits, BasicBlock *LatchExit, bool UseEpilogRemainder)
Returns true if we can profitably unroll the multi-exit loop L.
static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector< BasicBlock * > &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI)
Create a clone of the blocks in a loop and connect them together.
static Value * CreateTripRemainder(IRBuilder<> &B, Value *BECount, Value *TripCount, unsigned Count)
Calculate ModVal = (BECount + 1) % Count on the abstract integer domain accounting for the possibilit...
static cl::opt< bool > UnrollRuntimeOtherExitPredictable("unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden, cl::desc("Assume the non latch exit block to be predictable"))
static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop, Loop *RemainderLoop, uint64_t UnrollFactor)
static cl::opt< bool > UnrollRuntimeMultiExit("unroll-runtime-multi-exit", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolling for loops with multiple exits, when " "epilog is generated"))
static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, BasicBlock *Exit, BasicBlock *PreHeader, BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE)
Connect the unrolling epilog code to the original loop.
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *PrologExit, BasicBlock *OriginalLoopLatchExit, BasicBlock *PreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE)
Connect the unrolling prolog code to the original loop.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
This file contains the declarations for profiling metadata utility functions.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:372
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:208
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:314
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
DomTreeNodeBase * getIDom() const
NodeT * getBlock() const
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
Definition: Dominators.cpp:344
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1267
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1250
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2240
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2558
const BasicBlock * getParent() const
Definition: Instruction.h:90
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:82
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:355
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:139
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:232
BlockT * getHeader() const
Definition: LoopInfo.h:105
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:188
Store the result of a depth first search within basic blocks contained by a single loop.
Definition: LoopIterator.h:97
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:136
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
Definition: LoopIterator.h:101
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:1222
RPOIterator endRPO() const
Definition: LoopIterator.h:140
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:699
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:992
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:1140
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:525
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop's loop id metadata.
Definition: LoopInfo.cpp:537
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:501
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:943
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
void setIncomingValueForBlock(const BasicBlock *BB, Value *V)
Set every incoming value(s) for block BB to V.
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
This class uses information about analyze scalars to rewrite expressions in canonical form.
bool isHighCostExpansion(ArrayRef< const SCEV * > Exprs, Loop *L, unsigned Budget, const TargetTransformInfo *TTI, const Instruction *At)
Return true for expressions that can't be evaluated at runtime within given Budget.
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const SCEV * getConstant(ConstantInt *V)
bool loopHasNoAbnormalExits(const Loop *L)
Return true if the loop has no abnormal exits.
void forgetTopmostLoop(const Loop *L)
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1731
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: ValueMap.h:164
bool erase(const KeyT &Val)
Definition: ValueMap.h:190
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:375
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
self_iterator getIterator()
Definition: ilist_node.h:82
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:537
auto successors(const MachineBasicBlock *BB)
std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:263
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:748
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:398
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:382
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:89
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:71
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
cl::opt< unsigned > SCEVCheapExpansionBudget
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:256
void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, MemorySSA *MSSA)
Remove the backedge of the specified loop.
Definition: LoopUtils.cpp:699
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
const char *const LLVMLoopUnrollFollowupAll
Definition: UnrollLoop.h:42
TargetTransformInfo TTI
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:57
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
const char *const LLVMLoopUnrollFollowupRemainder
Definition: UnrollLoop.h:45
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
const Loop * addClonedBlockToLoopInfo(BasicBlock *OriginalBB, BasicBlock *ClonedBB, LoopInfo *LI, NewLoopsMap &NewLoops)
Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary and adds a mapping from the o...
Definition: LoopUnroll.cpp:147
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1976
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:269
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop=nullptr)
Insert code in the prolog/epilog code when unrolling a loop with a run-time trip-count.