LLVM  12.0.0git
LoopUnrollRuntime.cpp
Go to the documentation of this file.
1 //===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements some loop unrolling utilities for loops with run-time
10 // trip counts. See LoopUnroll.cpp for unrolling loops with compile-time
11 // trip counts.
12 //
13 // The functions in this file are used to generate extra code when the
14 // run-time trip count modulo the unroll factor is not 0. When this is the
15 // case, we need to generate code to execute these 'left over' iterations.
16 //
17 // The current strategy generates an if-then-else sequence prior to the
18 // unrolled loop to execute the 'left over' iterations before or after the
19 // unrolled loop.
20 //
21 //===----------------------------------------------------------------------===//
22 
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/Statistic.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/Dominators.h"
30 #include "llvm/IR/MDBuilder.h"
31 #include "llvm/IR/Metadata.h"
32 #include "llvm/IR/Module.h"
34 #include "llvm/Support/Debug.h"
36 #include "llvm/Transforms/Utils.h"
42 #include <algorithm>
43 
44 using namespace llvm;
45 
46 #define DEBUG_TYPE "loop-unroll"
47 
48 STATISTIC(NumRuntimeUnrolled,
49  "Number of loops unrolled with run-time trip counts");
51  "unroll-runtime-multi-exit", cl::init(false), cl::Hidden,
52  cl::desc("Allow runtime unrolling for loops with multiple exits, when "
53  "epilog is generated"));
54 
55 /// Connect the unrolling prolog code to the original loop.
56 /// The unrolling prolog code contains code to execute the
57 /// 'extra' iterations if the run-time trip count modulo the
58 /// unroll count is non-zero.
59 ///
60 /// This function performs the following:
61 /// - Create PHI nodes at prolog end block to combine values
62 /// that exit the prolog code and jump around the prolog.
63 /// - Add a PHI operand to a PHI node at the loop exit block
64 /// for values that exit the prolog and go around the loop.
65 /// - Branch around the original loop if the trip count is less
66 /// than the unroll factor.
67 ///
68 static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
69  BasicBlock *PrologExit,
70  BasicBlock *OriginalLoopLatchExit,
71  BasicBlock *PreHeader, BasicBlock *NewPreHeader,
73  LoopInfo *LI, bool PreserveLCSSA) {
74  // Loop structure should be the following:
75  // Preheader
76  // PrologHeader
77  // ...
78  // PrologLatch
79  // PrologExit
80  // NewPreheader
81  // Header
82  // ...
83  // Latch
84  // LatchExit
85  BasicBlock *Latch = L->getLoopLatch();
86  assert(Latch && "Loop must have a latch");
87  BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
88 
89  // Create a PHI node for each outgoing value from the original loop
90  // (which means it is an outgoing value from the prolog code too).
91  // The new PHI node is inserted in the prolog end basic block.
92  // The new PHI node value is added as an operand of a PHI node in either
93  // the loop header or the loop exit block.
94  for (BasicBlock *Succ : successors(Latch)) {
95  for (PHINode &PN : Succ->phis()) {
96  // Add a new PHI node to the prolog end block and add the
97  // appropriate incoming values.
98  // TODO: This code assumes that the PrologExit (or the LatchExit block for
99  // prolog loop) contains only one predecessor from the loop, i.e. the
100  // PrologLatch. When supporting multiple-exiting block loops, we can have
101  // two or more blocks that have the LatchExit as the target in the
102  // original loop.
103  PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
104  PrologExit->getFirstNonPHI());
105  // Adding a value to the new PHI node from the original loop preheader.
106  // This is the value that skips all the prolog code.
107  if (L->contains(&PN)) {
108  // Succ is loop header.
109  NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader),
110  PreHeader);
111  } else {
112  // Succ is LatchExit.
113  NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader);
114  }
115 
116  Value *V = PN.getIncomingValueForBlock(Latch);
117  if (Instruction *I = dyn_cast<Instruction>(V)) {
118  if (L->contains(I)) {
119  V = VMap.lookup(I);
120  }
121  }
122  // Adding a value to the new PHI node from the last prolog block
123  // that was created.
124  NewPN->addIncoming(V, PrologLatch);
125 
126  // Update the existing PHI node operand with the value from the
127  // new PHI node. How this is done depends on if the existing
128  // PHI node is in the original loop block, or the exit block.
129  if (L->contains(&PN))
130  PN.setIncomingValueForBlock(NewPreHeader, NewPN);
131  else
132  PN.addIncoming(NewPN, PrologExit);
133  }
134  }
135 
136  // Make sure that created prolog loop is in simplified form
137  SmallVector<BasicBlock *, 4> PrologExitPreds;
138  Loop *PrologLoop = LI->getLoopFor(PrologLatch);
139  if (PrologLoop) {
140  for (BasicBlock *PredBB : predecessors(PrologExit))
141  if (PrologLoop->contains(PredBB))
142  PrologExitPreds.push_back(PredBB);
143 
144  SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,
145  nullptr, PreserveLCSSA);
146  }
147 
148  // Create a branch around the original loop, which is taken if there are no
149  // iterations remaining to be executed after running the prologue.
150  Instruction *InsertPt = PrologExit->getTerminator();
151  IRBuilder<> B(InsertPt);
152 
153  assert(Count != 0 && "nonsensical Count!");
154 
155  // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
156  // This means %xtraiter is (BECount + 1) and all of the iterations of this
157  // loop were executed by the prologue. Note that if BECount <u (Count - 1)
158  // then (BECount + 1) cannot unsigned-overflow.
159  Value *BrLoopExit =
160  B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
161  // Split the exit to maintain loop canonicalization guarantees
162  SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));
163  SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
164  nullptr, PreserveLCSSA);
165  // Add the branch to the exit block (around the unrolled loop)
166  B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
167  InsertPt->eraseFromParent();
168  if (DT)
169  DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit);
170 }
171 
172 /// Connect the unrolling epilog code to the original loop.
173 /// The unrolling epilog code contains code to execute the
174 /// 'extra' iterations if the run-time trip count modulo the
175 /// unroll count is non-zero.
176 ///
177 /// This function performs the following:
178 /// - Update PHI nodes at the unrolling loop exit and epilog loop exit
179 /// - Create PHI nodes at the unrolling loop exit to combine
180 /// values that exit the unrolling loop code and jump around it.
181 /// - Update PHI operands in the epilog loop by the new PHI nodes
182 /// - Branch around the epilog loop if extra iters (ModVal) is zero.
183 ///
184 static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
185  BasicBlock *Exit, BasicBlock *PreHeader,
186  BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
187  ValueToValueMapTy &VMap, DominatorTree *DT,
188  LoopInfo *LI, bool PreserveLCSSA) {
189  BasicBlock *Latch = L->getLoopLatch();
190  assert(Latch && "Loop must have a latch");
191  BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
192 
193  // Loop structure should be the following:
194  //
195  // PreHeader
196  // NewPreHeader
197  // Header
198  // ...
199  // Latch
200  // NewExit (PN)
201  // EpilogPreHeader
202  // EpilogHeader
203  // ...
204  // EpilogLatch
205  // Exit (EpilogPN)
206 
207  // Update PHI nodes at NewExit and Exit.
208  for (PHINode &PN : NewExit->phis()) {
209  // PN should be used in another PHI located in Exit block as
210  // Exit was split by SplitBlockPredecessors into Exit and NewExit
211  // Basicaly it should look like:
212  // NewExit:
213  // PN = PHI [I, Latch]
214  // ...
215  // Exit:
216  // EpilogPN = PHI [PN, EpilogPreHeader]
217  //
218  // There is EpilogPreHeader incoming block instead of NewExit as
219  // NewExit was spilt 1 more time to get EpilogPreHeader.
220  assert(PN.hasOneUse() && "The phi should have 1 use");
221  PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser());
222  assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
223 
224  // Add incoming PreHeader from branch around the Loop
225  PN.addIncoming(UndefValue::get(PN.getType()), PreHeader);
226 
227  Value *V = PN.getIncomingValueForBlock(Latch);
229  if (I && L->contains(I))
230  // If value comes from an instruction in the loop add VMap value.
231  V = VMap.lookup(I);
232  // For the instruction out of the loop, constant or undefined value
233  // insert value itself.
234  EpilogPN->addIncoming(V, EpilogLatch);
235 
236  assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
237  "EpilogPN should have EpilogPreHeader incoming block");
238  // Change EpilogPreHeader incoming block to NewExit.
239  EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
240  NewExit);
241  // Now PHIs should look like:
242  // NewExit:
243  // PN = PHI [I, Latch], [undef, PreHeader]
244  // ...
245  // Exit:
246  // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
247  }
248 
249  // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
250  // Update corresponding PHI nodes in epilog loop.
251  for (BasicBlock *Succ : successors(Latch)) {
252  // Skip this as we already updated phis in exit blocks.
253  if (!L->contains(Succ))
254  continue;
255  for (PHINode &PN : Succ->phis()) {
256  // Add new PHI nodes to the loop exit block and update epilog
257  // PHIs with the new PHI values.
258  PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
259  NewExit->getFirstNonPHI());
260  // Adding a value to the new PHI node from the unrolling loop preheader.
261  NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
262  // Adding a value to the new PHI node from the unrolling loop latch.
263  NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch);
264 
265  // Update the existing PHI node operand with the value from the new PHI
266  // node. Corresponding instruction in epilog loop should be PHI.
267  PHINode *VPN = cast<PHINode>(VMap[&PN]);
268  VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN);
269  }
270  }
271 
272  Instruction *InsertPt = NewExit->getTerminator();
273  IRBuilder<> B(InsertPt);
274  Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
275  assert(Exit && "Loop must have a single exit block only");
276  // Split the epilogue exit to maintain loop canonicalization guarantees
278  SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
279  PreserveLCSSA);
280  // Add the branch to the exit block (around the unrolling loop)
281  B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
282  InsertPt->eraseFromParent();
283  if (DT)
284  DT->changeImmediateDominator(Exit, NewExit);
285 
286  // Split the main loop exit to maintain canonicalization guarantees.
287  SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
288  SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr,
289  PreserveLCSSA);
290 }
291 
292 /// Create a clone of the blocks in a loop and connect them together.
293 /// If CreateRemainderLoop is false, loop structure will not be cloned,
294 /// otherwise a new loop will be created including all cloned blocks, and the
295 /// iterator of it switches to count NewIter down to 0.
296 /// The cloned blocks should be inserted between InsertTop and InsertBot.
297 /// If loop structure is cloned InsertTop should be new preheader, InsertBot
298 /// new loop exit.
299 /// Return the new cloned loop that is created when CreateRemainderLoop is true.
300 static Loop *
301 CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
302  const bool UseEpilogRemainder, const bool UnrollRemainder,
303  BasicBlock *InsertTop,
304  BasicBlock *InsertBot, BasicBlock *Preheader,
305  std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
306  ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
307  StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
308  BasicBlock *Header = L->getHeader();
309  BasicBlock *Latch = L->getLoopLatch();
310  Function *F = Header->getParent();
311  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
312  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
313  Loop *ParentLoop = L->getParentLoop();
314  NewLoopsMap NewLoops;
315  NewLoops[ParentLoop] = ParentLoop;
316  if (!CreateRemainderLoop)
317  NewLoops[L] = ParentLoop;
318 
319  // For each block in the original loop, create a new copy,
320  // and update the value map with the newly created values.
321  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
322  BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
323  NewBlocks.push_back(NewBB);
324 
325  // If we're unrolling the outermost loop, there's no remainder loop,
326  // and this block isn't in a nested loop, then the new block is not
327  // in any loop. Otherwise, add it to loopinfo.
328  if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop)
329  addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
330 
331  VMap[*BB] = NewBB;
332  if (Header == *BB) {
333  // For the first block, add a CFG connection to this newly
334  // created block.
335  InsertTop->getTerminator()->setSuccessor(0, NewBB);
336  }
337 
338  if (DT) {
339  if (Header == *BB) {
340  // The header is dominated by the preheader.
341  DT->addNewBlock(NewBB, InsertTop);
342  } else {
343  // Copy information from original loop to unrolled loop.
344  BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
345  DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
346  }
347  }
348 
349  if (Latch == *BB) {
350  // For the last block, if CreateRemainderLoop is false, create a direct
351  // jump to InsertBot. If not, create a loop back to cloned head.
352  VMap.erase((*BB)->getTerminator());
353  BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
354  BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
355  IRBuilder<> Builder(LatchBR);
356  if (!CreateRemainderLoop) {
357  Builder.CreateBr(InsertBot);
358  } else {
359  PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
360  suffix + ".iter",
361  FirstLoopBB->getFirstNonPHI());
362  Value *IdxSub =
363  Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
364  NewIdx->getName() + ".sub");
365  Value *IdxCmp =
366  Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
367  Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
368  NewIdx->addIncoming(NewIter, InsertTop);
369  NewIdx->addIncoming(IdxSub, NewBB);
370  }
371  LatchBR->eraseFromParent();
372  }
373  }
374 
375  // Change the incoming values to the ones defined in the preheader or
376  // cloned loop.
377  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
378  PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
379  if (!CreateRemainderLoop) {
380  if (UseEpilogRemainder) {
381  unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
382  NewPHI->setIncomingBlock(idx, InsertTop);
383  NewPHI->removeIncomingValue(Latch, false);
384  } else {
385  VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
386  cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
387  }
388  } else {
389  unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
390  NewPHI->setIncomingBlock(idx, InsertTop);
391  BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
392  idx = NewPHI->getBasicBlockIndex(Latch);
393  Value *InVal = NewPHI->getIncomingValue(idx);
394  NewPHI->setIncomingBlock(idx, NewLatch);
395  if (Value *V = VMap.lookup(InVal))
396  NewPHI->setIncomingValue(idx, V);
397  }
398  }
399  if (CreateRemainderLoop) {
400  Loop *NewLoop = NewLoops[L];
401  assert(NewLoop && "L should have been cloned");
402  MDNode *LoopID = NewLoop->getLoopID();
403 
404  // Only add loop metadata if the loop is not going to be completely
405  // unrolled.
406  if (UnrollRemainder)
407  return NewLoop;
408 
411  if (NewLoopID.hasValue()) {
412  NewLoop->setLoopID(NewLoopID.getValue());
413 
414  // Do not setLoopAlreadyUnrolled if loop attributes have been defined
415  // explicitly.
416  return NewLoop;
417  }
418 
419  // Add unroll disable metadata to disable future unrolling for this loop.
420  NewLoop->setLoopAlreadyUnrolled();
421  return NewLoop;
422  }
423  else
424  return nullptr;
425 }
426 
427 /// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits
428 /// is populated with all the loop exit blocks other than the LatchExit block.
429 static bool canSafelyUnrollMultiExitLoop(Loop *L, BasicBlock *LatchExit,
430  bool PreserveLCSSA,
431  bool UseEpilogRemainder) {
432 
433  // We currently have some correctness constrains in unrolling a multi-exit
434  // loop. Check for these below.
435 
436  // We rely on LCSSA form being preserved when the exit blocks are transformed.
437  if (!PreserveLCSSA)
438  return false;
439 
440  // TODO: Support multiple exiting blocks jumping to the `LatchExit` when
441  // UnrollRuntimeMultiExit is true. This will need updating the logic in
442  // connectEpilog/connectProlog.
443  if (!LatchExit->getSinglePredecessor()) {
444  LLVM_DEBUG(
445  dbgs() << "Bailout for multi-exit handling when latch exit has >1 "
446  "predecessor.\n");
447  return false;
448  }
449  // FIXME: We bail out of multi-exit unrolling when epilog loop is generated
450  // and L is an inner loop. This is because in presence of multiple exits, the
451  // outer loop is incorrect: we do not add the EpilogPreheader and exit to the
452  // outer loop. This is automatically handled in the prolog case, so we do not
453  // have that bug in prolog generation.
454  if (UseEpilogRemainder && L->getParentLoop())
455  return false;
456 
457  // All constraints have been satisfied.
458  return true;
459 }
460 
461 /// Returns true if we can profitably unroll the multi-exit loop L. Currently,
462 /// we return true only if UnrollRuntimeMultiExit is set to true.
464  Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
465  bool PreserveLCSSA, bool UseEpilogRemainder) {
466 
467 #if !defined(NDEBUG)
468  assert(canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
469  UseEpilogRemainder) &&
470  "Should be safe to unroll before checking profitability!");
471 #endif
472 
473  // Priority goes to UnrollRuntimeMultiExit if it's supplied.
474  if (UnrollRuntimeMultiExit.getNumOccurrences())
475  return UnrollRuntimeMultiExit;
476 
477  // The main pain point with multi-exit loop unrolling is that once unrolled,
478  // we will not be able to merge all blocks into a straight line code.
479  // There are branches within the unrolled loop that go to the OtherExits.
480  // The second point is the increase in code size, but this is true
481  // irrespective of multiple exits.
482 
483  // Note: Both the heuristics below are coarse grained. We are essentially
484  // enabling unrolling of loops that have a single side exit other than the
485  // normal LatchExit (i.e. exiting into a deoptimize block).
486  // The heuristics considered are:
487  // 1. low number of branches in the unrolled version.
488  // 2. high predictability of these extra branches.
489  // We avoid unrolling loops that have more than two exiting blocks. This
490  // limits the total number of branches in the unrolled loop to be atmost
491  // the unroll factor (since one of the exiting blocks is the latch block).
492  SmallVector<BasicBlock*, 4> ExitingBlocks;
493  L->getExitingBlocks(ExitingBlocks);
494  if (ExitingBlocks.size() > 2)
495  return false;
496 
497  // The second heuristic is that L has one exit other than the latchexit and
498  // that exit is a deoptimize block. We know that deoptimize blocks are rarely
499  // taken, which also implies the branch leading to the deoptimize block is
500  // highly predictable.
501  return (OtherExits.size() == 1 &&
502  OtherExits[0]->getTerminatingDeoptimizeCall());
503  // TODO: These can be fine-tuned further to consider code size or deopt states
504  // that are captured by the deoptimize exit block.
505  // Also, we can extend this to support more cases, if we actually
506  // know of kinds of multiexit loops that would benefit from unrolling.
507 }
508 
509 // Assign the maximum possible trip count as the back edge weight for the
510 // remainder loop if the original loop comes with a branch weight.
512  Loop *RemainderLoop,
513  uint64_t UnrollFactor) {
514  uint64_t TrueWeight, FalseWeight;
515  BranchInst *LatchBR =
516  cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator());
517  if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
518  uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader()
519  ? FalseWeight
520  : TrueWeight;
521  assert(UnrollFactor > 1);
522  uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight;
523  BasicBlock *Header = RemainderLoop->getHeader();
524  BasicBlock *Latch = RemainderLoop->getLoopLatch();
525  auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator());
526  unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1);
527  MDBuilder MDB(RemainderLatchBR->getContext());
528  MDNode *WeightNode =
529  HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
530  : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
531  RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
532  }
533 }
534 
535 /// Insert code in the prolog/epilog code when unrolling a loop with a
536 /// run-time trip-count.
537 ///
538 /// This method assumes that the loop unroll factor is total number
539 /// of loop bodies in the loop after unrolling. (Some folks refer
540 /// to the unroll factor as the number of *extra* copies added).
541 /// We assume also that the loop unroll factor is a power-of-two. So, after
542 /// unrolling the loop, the number of loop bodies executed is 2,
543 /// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch
544 /// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
545 /// the switch instruction is generated.
546 ///
547 /// ***Prolog case***
548 /// extraiters = tripcount % loopfactor
549 /// if (extraiters == 0) jump Loop:
550 /// else jump Prol:
551 /// Prol: LoopBody;
552 /// extraiters -= 1 // Omitted if unroll factor is 2.
553 /// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.
554 /// if (tripcount < loopfactor) jump End:
555 /// Loop:
556 /// ...
557 /// End:
558 ///
559 /// ***Epilog case***
560 /// extraiters = tripcount % loopfactor
561 /// if (tripcount < loopfactor) jump LoopExit:
562 /// unroll_iters = tripcount - extraiters
563 /// Loop: LoopBody; (executes unroll_iter times);
564 /// unroll_iter -= 1
565 /// if (unroll_iter != 0) jump Loop:
566 /// LoopExit:
567 /// if (extraiters == 0) jump EpilExit:
568 /// Epil: LoopBody; (executes extraiters times)
569 /// extraiters -= 1 // Omitted if unroll factor is 2.
570 /// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.
571 /// EpilExit:
572 
574  Loop *L, unsigned Count, bool AllowExpensiveTripCount,
575  bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV,
577  const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop) {
578  LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
579  LLVM_DEBUG(L->dump());
580  LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
581  : dbgs() << "Using prolog remainder.\n");
582 
583  // Make sure the loop is in canonical form.
584  if (!L->isLoopSimplifyForm()) {
585  LLVM_DEBUG(dbgs() << "Not in simplify form!\n");
586  return false;
587  }
588 
589  // Guaranteed by LoopSimplifyForm.
590  BasicBlock *Latch = L->getLoopLatch();
591  BasicBlock *Header = L->getHeader();
592 
593  BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
594 
595  if (!LatchBR || LatchBR->isUnconditional()) {
596  // The loop-rotate pass can be helpful to avoid this in many cases.
597  LLVM_DEBUG(
598  dbgs()
599  << "Loop latch not terminated by a conditional branch.\n");
600  return false;
601  }
602 
603  unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
604  BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
605 
606  if (L->contains(LatchExit)) {
607  // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
608  // targets of the Latch be an exit block out of the loop.
609  LLVM_DEBUG(
610  dbgs()
611  << "One of the loop latch successors must be the exit block.\n");
612  return false;
613  }
614 
615  // These are exit blocks other than the target of the latch exiting block.
616  SmallVector<BasicBlock *, 4> OtherExits;
617  L->getUniqueNonLatchExitBlocks(OtherExits);
618  bool isMultiExitUnrollingEnabled =
619  canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
620  UseEpilogRemainder) &&
621  canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
622  UseEpilogRemainder);
623  // Support only single exit and exiting block unless multi-exit loop unrolling is enabled.
624  if (!isMultiExitUnrollingEnabled &&
625  (!L->getExitingBlock() || OtherExits.size())) {
626  LLVM_DEBUG(
627  dbgs()
628  << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
629  "enabled!\n");
630  return false;
631  }
632  // Use Scalar Evolution to compute the trip count. This allows more loops to
633  // be unrolled than relying on induction var simplification.
634  if (!SE)
635  return false;
636 
637  // Only unroll loops with a computable trip count, and the trip count needs
638  // to be an int value (allowing a pointer type is a TODO item).
639  // We calculate the backedge count by using getExitCount on the Latch block,
640  // which is proven to be the only exiting block in this loop. This is same as
641  // calculating getBackedgeTakenCount on the loop (which computes SCEV for all
642  // exiting blocks).
643  const SCEV *BECountSC = SE->getExitCount(L, Latch);
644  if (isa<SCEVCouldNotCompute>(BECountSC) ||
645  !BECountSC->getType()->isIntegerTy()) {
646  LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n");
647  return false;
648  }
649 
650  unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
651 
652  // Add 1 since the backedge count doesn't include the first loop iteration.
653  const SCEV *TripCountSC =
654  SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
655  if (isa<SCEVCouldNotCompute>(TripCountSC)) {
656  LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n");
657  return false;
658  }
659 
660  BasicBlock *PreHeader = L->getLoopPreheader();
661  BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
662  const DataLayout &DL = Header->getModule()->getDataLayout();
663  SCEVExpander Expander(*SE, DL, "loop-unroll");
664  if (!AllowExpensiveTripCount &&
665  Expander.isHighCostExpansion(TripCountSC, L, SCEVCheapExpansionBudget,
666  TTI, PreHeaderBR)) {
667  LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n");
668  return false;
669  }
670 
671  // This constraint lets us deal with an overflowing trip count easily; see the
672  // comment on ModVal below.
673  if (Log2_32(Count) > BEWidth) {
674  LLVM_DEBUG(
675  dbgs()
676  << "Count failed constraint on overflow trip count calculation.\n");
677  return false;
678  }
679 
680  // Loop structure is the following:
681  //
682  // PreHeader
683  // Header
684  // ...
685  // Latch
686  // LatchExit
687 
688  BasicBlock *NewPreHeader;
689  BasicBlock *NewExit = nullptr;
690  BasicBlock *PrologExit = nullptr;
691  BasicBlock *EpilogPreHeader = nullptr;
692  BasicBlock *PrologPreHeader = nullptr;
693 
694  if (UseEpilogRemainder) {
695  // If epilog remainder
696  // Split PreHeader to insert a branch around loop for unrolling.
697  NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
698  NewPreHeader->setName(PreHeader->getName() + ".new");
699  // Split LatchExit to create phi nodes from branch above.
700  SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
701  NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI,
702  nullptr, PreserveLCSSA);
703  // NewExit gets its DebugLoc from LatchExit, which is not part of the
704  // original Loop.
705  // Fix this by setting Loop's DebugLoc to NewExit.
706  auto *NewExitTerminator = NewExit->getTerminator();
707  NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());
708  // Split NewExit to insert epilog remainder loop.
709  EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
710  EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
711  } else {
712  // If prolog remainder
713  // Split the original preheader twice to insert prolog remainder loop
714  PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
715  PrologPreHeader->setName(Header->getName() + ".prol.preheader");
716  PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
717  DT, LI);
718  PrologExit->setName(Header->getName() + ".prol.loopexit");
719  // Split PrologExit to get NewPreHeader.
720  NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
721  NewPreHeader->setName(PreHeader->getName() + ".new");
722  }
723  // Loop structure should be the following:
724  // Epilog Prolog
725  //
726  // PreHeader PreHeader
727  // *NewPreHeader *PrologPreHeader
728  // Header *PrologExit
729  // ... *NewPreHeader
730  // Latch Header
731  // *NewExit ...
732  // *EpilogPreHeader Latch
733  // LatchExit LatchExit
734 
735  // Calculate conditions for branch around loop for unrolling
736  // in epilog case and around prolog remainder loop in prolog case.
737  // Compute the number of extra iterations required, which is:
738  // extra iterations = run-time trip count % loop unroll factor
739  PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
740  Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
741  PreHeaderBR);
742  Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
743  PreHeaderBR);
744  IRBuilder<> B(PreHeaderBR);
745  Value *ModVal;
746  // Calculate ModVal = (BECount + 1) % Count.
747  // Note that TripCount is BECount + 1.
748  if (isPowerOf2_32(Count)) {
749  // When Count is power of 2 we don't BECount for epilog case, however we'll
750  // need it for a branch around unrolling loop for prolog case.
751  ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
752  // 1. There are no iterations to be run in the prolog/epilog loop.
753  // OR
754  // 2. The addition computing TripCount overflowed.
755  //
756  // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
757  // the number of iterations that remain to be run in the original loop is a
758  // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
759  // explicitly check this above).
760  } else {
761  // As (BECount + 1) can potentially unsigned overflow we count
762  // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
763  Value *ModValTmp = B.CreateURem(BECount,
764  ConstantInt::get(BECount->getType(),
765  Count));
766  Value *ModValAdd = B.CreateAdd(ModValTmp,
767  ConstantInt::get(ModValTmp->getType(), 1));
768  // At that point (BECount % Count) + 1 could be equal to Count.
769  // To handle this case we need to take mod by Count one more time.
770  ModVal = B.CreateURem(ModValAdd,
771  ConstantInt::get(BECount->getType(), Count),
772  "xtraiter");
773  }
774  Value *BranchVal =
775  UseEpilogRemainder ? B.CreateICmpULT(BECount,
776  ConstantInt::get(BECount->getType(),
777  Count - 1)) :
778  B.CreateIsNotNull(ModVal, "lcmp.mod");
779  BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
780  BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
781  // Branch to either remainder (extra iterations) loop or unrolling loop.
782  B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
783  PreHeaderBR->eraseFromParent();
784  if (DT) {
785  if (UseEpilogRemainder)
786  DT->changeImmediateDominator(NewExit, PreHeader);
787  else
788  DT->changeImmediateDominator(PrologExit, PreHeader);
789  }
790  Function *F = Header->getParent();
791  // Get an ordered list of blocks in the loop to help with the ordering of the
792  // cloned blocks in the prolog/epilog code
793  LoopBlocksDFS LoopBlocks(L);
794  LoopBlocks.perform(LI);
795 
796  //
797  // For each extra loop iteration, create a copy of the loop's basic blocks
798  // and generate a condition that branches to the copy depending on the
799  // number of 'left over' iterations.
800  //
801  std::vector<BasicBlock *> NewBlocks;
802  ValueToValueMapTy VMap;
803 
804  // For unroll factor 2 remainder loop will have 1 iterations.
805  // Do not create 1 iteration loop.
806  bool CreateRemainderLoop = (Count != 2);
807 
808  // Clone all the basic blocks in the loop. If Count is 2, we don't clone
809  // the loop, otherwise we create a cloned loop to execute the extra
810  // iterations. This function adds the appropriate CFG connections.
811  BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
812  BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
813  Loop *remainderLoop = CloneLoopBlocks(
814  L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder,
815  InsertTop, InsertBot,
816  NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
817 
818  // Assign the maximum possible trip count as the back edge weight for the
819  // remainder loop if the original loop comes with a branch weight.
820  if (remainderLoop && !UnrollRemainder)
821  updateLatchBranchWeightsForRemainderLoop(L, remainderLoop, Count);
822 
823  // Insert the cloned blocks into the function.
824  F->getBasicBlockList().splice(InsertBot->getIterator(),
825  F->getBasicBlockList(),
826  NewBlocks[0]->getIterator(),
827  F->end());
828 
829  // Now the loop blocks are cloned and the other exiting blocks from the
830  // remainder are connected to the original Loop's exit blocks. The remaining
831  // work is to update the phi nodes in the original loop, and take in the
832  // values from the cloned region.
833  for (auto *BB : OtherExits) {
834  for (auto &II : *BB) {
835 
836  // Given we preserve LCSSA form, we know that the values used outside the
837  // loop will be used through these phi nodes at the exit blocks that are
838  // transformed below.
839  if (!isa<PHINode>(II))
840  break;
841  PHINode *Phi = cast<PHINode>(&II);
842  unsigned oldNumOperands = Phi->getNumIncomingValues();
843  // Add the incoming values from the remainder code to the end of the phi
844  // node.
845  for (unsigned i =0; i < oldNumOperands; i++){
846  Value *newVal = VMap.lookup(Phi->getIncomingValue(i));
847  // newVal can be a constant or derived from values outside the loop, and
848  // hence need not have a VMap value. Also, since lookup already generated
849  // a default "null" VMap entry for this value, we need to populate that
850  // VMap entry correctly, with the mapped entry being itself.
851  if (!newVal) {
852  newVal = Phi->getIncomingValue(i);
853  VMap[Phi->getIncomingValue(i)] = Phi->getIncomingValue(i);
854  }
855  Phi->addIncoming(newVal,
856  cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)]));
857  }
858  }
859 #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
860  for (BasicBlock *SuccBB : successors(BB)) {
861  assert(!(any_of(OtherExits,
862  [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) ||
863  SuccBB == LatchExit) &&
864  "Breaks the definition of dedicated exits!");
865  }
866 #endif
867  }
868 
869  // Update the immediate dominator of the exit blocks and blocks that are
870  // reachable from the exit blocks. This is needed because we now have paths
871  // from both the original loop and the remainder code reaching the exit
872  // blocks. While the IDom of these exit blocks were from the original loop,
873  // now the IDom is the preheader (which decides whether the original loop or
874  // remainder code should run).
875  if (DT && !L->getExitingBlock()) {
876  SmallVector<BasicBlock *, 16> ChildrenToUpdate;
877  // NB! We have to examine the dom children of all loop blocks, not just
878  // those which are the IDom of the exit blocks. This is because blocks
879  // reachable from the exit blocks can have their IDom as the nearest common
880  // dominator of the exit blocks.
881  for (auto *BB : L->blocks()) {
882  auto *DomNodeBB = DT->getNode(BB);
883  for (auto *DomChild : DomNodeBB->children()) {
884  auto *DomChildBB = DomChild->getBlock();
885  if (!L->contains(LI->getLoopFor(DomChildBB)))
886  ChildrenToUpdate.push_back(DomChildBB);
887  }
888  }
889  for (auto *BB : ChildrenToUpdate)
890  DT->changeImmediateDominator(BB, PreHeader);
891  }
892 
893  // Loop structure should be the following:
894  // Epilog Prolog
895  //
896  // PreHeader PreHeader
897  // NewPreHeader PrologPreHeader
898  // Header PrologHeader
899  // ... ...
900  // Latch PrologLatch
901  // NewExit PrologExit
902  // EpilogPreHeader NewPreHeader
903  // EpilogHeader Header
904  // ... ...
905  // EpilogLatch Latch
906  // LatchExit LatchExit
907 
908  // Rewrite the cloned instruction operands to use the values created when the
909  // clone is created.
910  for (BasicBlock *BB : NewBlocks) {
911  for (Instruction &I : *BB) {
912  RemapInstruction(&I, VMap,
914  }
915  }
916 
917  if (UseEpilogRemainder) {
918  // Connect the epilog code to the original loop and update the
919  // PHI functions.
920  ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader,
921  EpilogPreHeader, NewPreHeader, VMap, DT, LI,
922  PreserveLCSSA);
923 
924  // Update counter in loop for unrolling.
925  // I should be multiply of Count.
926  IRBuilder<> B2(NewPreHeader->getTerminator());
927  Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
928  BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
929  B2.SetInsertPoint(LatchBR);
930  PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
931  Header->getFirstNonPHI());
932  Value *IdxSub =
933  B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
934  NewIdx->getName() + ".nsub");
935  Value *IdxCmp;
936  if (LatchBR->getSuccessor(0) == Header)
937  IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");
938  else
939  IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");
940  NewIdx->addIncoming(TestVal, NewPreHeader);
941  NewIdx->addIncoming(IdxSub, Latch);
942  LatchBR->setCondition(IdxCmp);
943  } else {
944  // Connect the prolog code to the original loop and update the
945  // PHI functions.
946  ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,
947  NewPreHeader, VMap, DT, LI, PreserveLCSSA);
948  }
949 
950  // If this loop is nested, then the loop unroller changes the code in the any
951  // of its parent loops, so the Scalar Evolution pass needs to be run again.
952  SE->forgetTopmostLoop(L);
953 
954  // Verify that the Dom Tree is correct.
955 #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
956  if (DT)
958 #endif
959 
960  // Canonicalize to LoopSimplifyForm both original and remainder loops. We
961  // cannot rely on the LoopUnrollPass to do this because it only does
962  // canonicalization for parent/subloops and not the sibling loops.
963  if (OtherExits.size() > 0) {
964  // Generate dedicated exit blocks for the original loop, to preserve
965  // LoopSimplifyForm.
966  formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA);
967  // Generate dedicated exit blocks for the remainder loop if one exists, to
968  // preserve LoopSimplifyForm.
969  if (remainderLoop)
970  formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA);
971  }
972 
973  auto UnrollResult = LoopUnrollResult::Unmodified;
974  if (remainderLoop && UnrollRemainder) {
975  LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
976  UnrollResult =
977  UnrollLoop(remainderLoop,
978  {/*Count*/ Count - 1, /*TripCount*/ Count - 1,
979  /*Force*/ false, /*AllowRuntime*/ false,
980  /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
981  /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
982  /*PeelCount*/ 0, /*UnrollRemainder*/ false, ForgetAllSCEV},
983  LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA);
984  }
985 
986  if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
987  *ResultLoop = remainderLoop;
988  NumRuntimeUnrolled++;
989  return true;
990 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:80
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
LLVM_NODISCARD std::enable_if_t< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type > dyn_cast(const Y &Val)
Definition: Casting.h:334
TargetTransformInfo TTI
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:208
const SCEV * getConstant(ConstantInt *V)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop=nullptr)
Insert code in the prolog/epilog code when unrolling a loop with a run-time trip-count.
iterator end()
Definition: Function.h:707
void setIncomingValueForBlock(const BasicBlock *BB, Value *V)
Set every incoming value(s) for block BB to V.
The main scalar evolution driver.
This file contains the declarations for metadata subclasses.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:159
A cache of @llvm.assume calls within a function.
static bool canSafelyUnrollMultiExitLoop(Loop *L, BasicBlock *LatchExit, bool PreserveLCSSA, bool UseEpilogRemainder)
Returns true if we can safely unroll a multi-exit/exiting loop.
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
Metadata node.
Definition: Metadata.h:870
F(f)
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:150
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction - everything before SplitPt stays in Old and e...
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1320
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:289
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Split the edge connecting specified block.
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:146
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:397
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:947
static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector< BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI)
Create a clone of the blocks in a loop and connect them together.
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional &#39;br Cond, TrueDest, FalseDest&#39; instruction.
Definition: IRBuilder.h:958
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:198
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:342
RPOIterator endRPO() const
Definition: LoopIterator.h:140
BlockT * getHeader() const
Definition: LoopInfo.h:104
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:515
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:255
const char *const LLVMLoopUnrollFollowupRemainder
Definition: UnrollLoop.h:44
const SCEV * getExitCount(const Loop *L, BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: ValueMap.h:165
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:1109
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:72
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:67
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *PrologExit, BasicBlock *OriginalLoopLatchExit, BasicBlock *PreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
Connect the unrolling prolog code to the original loop.
The loop was fully unrolled into straight-line code.
NodeT * getBlock() const
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock *> Preds, const char *Suffix, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:434
static cl::opt< bool > UnrollRuntimeMultiExit("unroll-runtime-multi-exit", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolling for loops with multiple exits, when " "epilog is generated"))
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:214
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:360
void dump() const
Definition: LoopInfo.cpp:649
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:258
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:492
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
Conditional or Unconditional Branch instruction.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node&#39;s...
DomTreeNodeBase * getIDom() const
Value * getIncomingValueForBlock(const BasicBlock *BB) const
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop&#39;s loop id metadata.
Definition: LoopInfo.cpp:527
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
static bool canProfitablyUnrollMultiExitLoop(Loop *L, SmallVectorImpl< BasicBlock *> &OtherExits, BasicBlock *LatchExit, bool PreserveLCSSA, bool UseEpilogRemainder)
Returns true if we can profitably unroll the multi-exit loop L.
void splice(iterator where, iplist_impl &L2)
Definition: ilist.h:329
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1498
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
Definition: LoopIterator.h:101
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1170
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2180
self_iterator getIterator()
Definition: ilist_node.h:81
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return an i1 value testing if Arg is not null.
Definition: IRBuilder.h:2464
assume Assume Builder
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:33
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1665
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, BasicBlock *Exit, BasicBlock *PreHeader, BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
Connect the unrolling epilog code to the original loop.
void getUniqueNonLatchExitBlocks(SmallVectorImpl< BlockT *> &ExitBlocks) const
Return all unique successor blocks of this loop except successors from Latch block are not considered...
Definition: LoopInfoImpl.h:121
const char *const LLVMLoopUnrollFollowupAll
Definition: UnrollLoop.h:41
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
Iterator for intrusive lists based on ilist_node.
static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop, Loop *RemainderLoop, uint64_t UnrollFactor)
Type * getType() const
Return the LLVM type of this SCEV expression.
void setIncomingBlock(unsigned i, BasicBlock *BB)
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Value * CreateURem(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1249
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:883
Module.h This file contains the declarations for the Module class.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:786
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:125
unsigned getNumIncomingValues() const
Return the number of incoming edges.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:318
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:597
const Loop * addClonedBlockToLoopInfo(BasicBlock *OriginalBB, BasicBlock *ClonedBB, LoopInfo *LI, NewLoopsMap &NewLoops)
Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary and adds a mapping from the o...
Definition: LoopUnroll.cpp:135
Store the result of a depth first search within basic blocks contained by a single loop...
Definition: LoopIterator.h:97
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM...
Definition: ValueMapper.h:251
This class uses information about analyze scalars to rewrite expressions in canonical form...
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:282
If this flag is set, the remapper ignores missing function-local entries (Argument, Instruction, BasicBlock) that are not in the value map.
Definition: ValueMapper.h:90
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
bool hasValue() const
Definition: Optional.h:259
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:466
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:491
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:363
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:516
cl::opt< unsigned > SCEVCheapExpansionBudget
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:270
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:59
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
const BasicBlockListType & getBasicBlockList() const
Get the underlying elements of the Function...
Definition: Function.h:682
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:345
bool isUnconditional() const
void setCondition(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:74
succ_range successors(Instruction *I)
Definition: CFG.h:260
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:136
The loop was not modified.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
bool isHighCostExpansion(const SCEV *Expr, Loop *L, unsigned Budget, const TargetTransformInfo *TTI, const Instruction *At)
Return true for expressions that can&#39;t be evaluated at runtime within given Budget.
void setIncomingValue(unsigned i, Value *V)
#define LLVM_DEBUG(X)
Definition: Debug.h:122
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Definition: Metadata.cpp:1314
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:168
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:48
bool erase(const KeyT &Val)
Definition: ValueMap.h:191
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
void forgetTopmostLoop(const Loop *L)