LLVM  10.0.0svn
LoopUnrollPeel.cpp
Go to the documentation of this file.
1 //===- UnrollLoopPeel.cpp - Loop peeling utilities ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements some loop unrolling utilities for peeling loops
10 // with dynamically inferred (from PGO) trip counts. See LoopUnroll.cpp for
11 // unrolling loops with compile-time constant trip counts.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/Optional.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/LoopInfo.h"
24 #include "llvm/IR/BasicBlock.h"
25 #include "llvm/IR/Dominators.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/IR/InstrTypes.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/IR/MDBuilder.h"
32 #include "llvm/IR/Metadata.h"
33 #include "llvm/IR/PatternMatch.h"
34 #include "llvm/Support/Casting.h"
36 #include "llvm/Support/Debug.h"
44 #include <algorithm>
45 #include <cassert>
46 #include <cstdint>
47 #include <limits>
48 
49 using namespace llvm;
50 using namespace llvm::PatternMatch;
51 
52 #define DEBUG_TYPE "loop-unroll"
53 
54 STATISTIC(NumPeeled, "Number of loops peeled");
55 
57  "unroll-peel-max-count", cl::init(7), cl::Hidden,
58  cl::desc("Max average trip count which will cause loop peeling."));
59 
61  "unroll-force-peel-count", cl::init(0), cl::Hidden,
62  cl::desc("Force a peel count regardless of profiling information."));
63 
65  "unroll-peel-multi-deopt-exit", cl::init(true), cl::Hidden,
66  cl::desc("Allow peeling of loops with multiple deopt exits."));
67 
68 static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
69 
70 // Designates that a Phi is estimated to become invariant after an "infinite"
71 // number of loop iterations (i.e. only may become an invariant if the loop is
72 // fully unrolled).
73 static const unsigned InfiniteIterationsToInvariance =
75 
76 // Check whether we are capable of peeling this loop.
77 bool llvm::canPeel(Loop *L) {
78  // Make sure the loop is in simplified form
79  if (!L->isLoopSimplifyForm())
80  return false;
81 
85 
86  if (!Exits.empty()) {
87  // Latch's terminator is a conditional branch, Latch is exiting and
88  // all non Latch exits ends up with deoptimize.
89  const BasicBlock *Latch = L->getLoopLatch();
90  const BranchInst *T = dyn_cast<BranchInst>(Latch->getTerminator());
91  return T && T->isConditional() && L->isLoopExiting(Latch) &&
92  all_of(Exits, [](const BasicBlock *BB) {
93  return BB->getTerminatingDeoptimizeCall();
94  });
95  }
96  }
97 
98  // Only peel loops that contain a single exit
99  if (!L->getExitingBlock() || !L->getUniqueExitBlock())
100  return false;
101 
102  // Don't try to peel loops where the latch is not the exiting block.
103  // This can be an indication of two different things:
104  // 1) The loop is not rotated.
105  // 2) The loop contains irreducible control flow that involves the latch.
106  if (L->getLoopLatch() != L->getExitingBlock())
107  return false;
108 
109  return true;
110 }
111 
112 // This function calculates the number of iterations after which the given Phi
113 // becomes an invariant. The pre-calculated values are memorized in the map. The
114 // function (shortcut is I) is calculated according to the following definition:
115 // Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
116 // If %y is a loop invariant, then I(%x) = 1.
117 // If %y is a Phi from the loop header, I(%x) = I(%y) + 1.
118 // Otherwise, I(%x) is infinite.
119 // TODO: Actually if %y is an expression that depends only on Phi %z and some
120 // loop invariants, we can estimate I(%x) = I(%z) + 1. The example
121 // looks like:
122 // %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration.
123 // %y = phi(0, 5),
124 // %a = %y + 1.
126  PHINode *Phi, Loop *L, BasicBlock *BackEdge,
127  SmallDenseMap<PHINode *, unsigned> &IterationsToInvariance) {
128  assert(Phi->getParent() == L->getHeader() &&
129  "Non-loop Phi should not be checked for turning into invariant.");
130  assert(BackEdge == L->getLoopLatch() && "Wrong latch?");
131  // If we already know the answer, take it from the map.
132  auto I = IterationsToInvariance.find(Phi);
133  if (I != IterationsToInvariance.end())
134  return I->second;
135 
136  // Otherwise we need to analyze the input from the back edge.
137  Value *Input = Phi->getIncomingValueForBlock(BackEdge);
138  // Place infinity to map to avoid infinite recursion for cycled Phis. Such
139  // cycles can never stop on an invariant.
140  IterationsToInvariance[Phi] = InfiniteIterationsToInvariance;
141  unsigned ToInvariance = InfiniteIterationsToInvariance;
142 
143  if (L->isLoopInvariant(Input))
144  ToInvariance = 1u;
145  else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) {
146  // Only consider Phis in header block.
147  if (IncPhi->getParent() != L->getHeader())
148  return InfiniteIterationsToInvariance;
149  // If the input becomes an invariant after X iterations, then our Phi
150  // becomes an invariant after X + 1 iterations.
151  unsigned InputToInvariance = calculateIterationsToInvariance(
152  IncPhi, L, BackEdge, IterationsToInvariance);
153  if (InputToInvariance != InfiniteIterationsToInvariance)
154  ToInvariance = InputToInvariance + 1u;
155  }
156 
157  // If we found that this Phi lies in an invariant chain, update the map.
158  if (ToInvariance != InfiniteIterationsToInvariance)
159  IterationsToInvariance[Phi] = ToInvariance;
160  return ToInvariance;
161 }
162 
163 // Return the number of iterations to peel off that make conditions in the
164 // body true/false. For example, if we peel 2 iterations off the loop below,
165 // the condition i < 2 can be evaluated at compile time.
166 // for (i = 0; i < n; i++)
167 // if (i < 2)
168 // ..
169 // else
170 // ..
171 // }
172 static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
173  ScalarEvolution &SE) {
174  assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
175  unsigned DesiredPeelCount = 0;
176 
177  for (auto *BB : L.blocks()) {
178  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
179  if (!BI || BI->isUnconditional())
180  continue;
181 
182  // Ignore loop exit condition.
183  if (L.getLoopLatch() == BB)
184  continue;
185 
186  Value *Condition = BI->getCondition();
187  Value *LeftVal, *RightVal;
188  CmpInst::Predicate Pred;
189  if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
190  continue;
191 
192  const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
193  const SCEV *RightSCEV = SE.getSCEV(RightVal);
194 
195  // Do not consider predicates that are known to be true or false
196  // independently of the loop iteration.
197  if (SE.isKnownPredicate(Pred, LeftSCEV, RightSCEV) ||
199  RightSCEV))
200  continue;
201 
202  // Check if we have a condition with one AddRec and one non AddRec
203  // expression. Normalize LeftSCEV to be the AddRec.
204  if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
205  if (isa<SCEVAddRecExpr>(RightSCEV)) {
206  std::swap(LeftSCEV, RightSCEV);
207  Pred = ICmpInst::getSwappedPredicate(Pred);
208  } else
209  continue;
210  }
211 
212  const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
213 
214  // Avoid huge SCEV computations in the loop below, make sure we only
215  // consider AddRecs of the loop we are trying to peel and avoid
216  // non-monotonic predicates, as we will not be able to simplify the loop
217  // body.
218  // FIXME: For the non-monotonic predicates ICMP_EQ and ICMP_NE we can
219  // simplify the loop, if we peel 1 additional iteration, if there
220  // is no wrapping.
221  bool Increasing;
222  if (!LeftAR->isAffine() || LeftAR->getLoop() != &L ||
223  !SE.isMonotonicPredicate(LeftAR, Pred, Increasing))
224  continue;
225  (void)Increasing;
226 
227  // Check if extending the current DesiredPeelCount lets us evaluate Pred
228  // or !Pred in the loop body statically.
229  unsigned NewPeelCount = DesiredPeelCount;
230 
231  const SCEV *IterVal = LeftAR->evaluateAtIteration(
232  SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);
233 
234  // If the original condition is not known, get the negated predicate
235  // (which holds on the else branch) and check if it is known. This allows
236  // us to peel of iterations that make the original condition false.
237  if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
238  Pred = ICmpInst::getInversePredicate(Pred);
239 
240  const SCEV *Step = LeftAR->getStepRecurrence(SE);
241  while (NewPeelCount < MaxPeelCount &&
242  SE.isKnownPredicate(Pred, IterVal, RightSCEV)) {
243  IterVal = SE.getAddExpr(IterVal, Step);
244  NewPeelCount++;
245  }
246 
247  // Only peel the loop if the monotonic predicate !Pred becomes known in the
248  // first iteration of the loop body after peeling.
249  if (NewPeelCount > DesiredPeelCount &&
251  RightSCEV))
252  DesiredPeelCount = NewPeelCount;
253  }
254 
255  return DesiredPeelCount;
256 }
257 
258 // Return the number of iterations we want to peel off.
259 void llvm::computePeelCount(Loop *L, unsigned LoopSize,
261  unsigned &TripCount, ScalarEvolution &SE) {
262  assert(LoopSize > 0 && "Zero loop size is not allowed!");
263  // Save the UP.PeelCount value set by the target in
264  // TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
265  unsigned TargetPeelCount = UP.PeelCount;
266  UP.PeelCount = 0;
267  if (!canPeel(L))
268  return;
269 
270  // Only try to peel innermost loops.
271  if (!L->empty())
272  return;
273 
274  // If the user provided a peel count, use that.
275  bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
276  if (UserPeelCount) {
277  LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
278  << " iterations.\n");
280  UP.PeelProfiledIterations = true;
281  return;
282  }
283 
284  // Skip peeling if it's disabled.
285  if (!UP.AllowPeeling)
286  return;
287 
288  unsigned AlreadyPeeled = 0;
289  if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
290  AlreadyPeeled = *Peeled;
291  // Stop if we already peeled off the maximum number of iterations.
292  if (AlreadyPeeled >= UnrollPeelMaxCount)
293  return;
294 
295  // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
296  // iterations of the loop. For this we compute the number for iterations after
297  // which every Phi is guaranteed to become an invariant, and try to peel the
298  // maximum number of iterations among these values, thus turning all those
299  // Phis into invariants.
300  // First, check that we can peel at least one iteration.
301  if (2 * LoopSize <= UP.Threshold && UnrollPeelMaxCount > 0) {
302  // Store the pre-calculated values here.
303  SmallDenseMap<PHINode *, unsigned> IterationsToInvariance;
304  // Now go through all Phis to calculate their the number of iterations they
305  // need to become invariants.
306  // Start the max computation with the UP.PeelCount value set by the target
307  // in TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
308  unsigned DesiredPeelCount = TargetPeelCount;
309  BasicBlock *BackEdge = L->getLoopLatch();
310  assert(BackEdge && "Loop is not in simplified form?");
311  for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) {
312  PHINode *Phi = cast<PHINode>(&*BI);
313  unsigned ToInvariance = calculateIterationsToInvariance(
314  Phi, L, BackEdge, IterationsToInvariance);
315  if (ToInvariance != InfiniteIterationsToInvariance)
316  DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance);
317  }
318 
319  // Pay respect to limitations implied by loop size and the max peel count.
320  unsigned MaxPeelCount = UnrollPeelMaxCount;
321  MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1);
322 
323  DesiredPeelCount = std::max(DesiredPeelCount,
324  countToEliminateCompares(*L, MaxPeelCount, SE));
325 
326  if (DesiredPeelCount > 0) {
327  DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
328  // Consider max peel count limitation.
329  assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
330  if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
331  LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
332  << " iteration(s) to turn"
333  << " some Phis into invariants.\n");
334  UP.PeelCount = DesiredPeelCount;
335  UP.PeelProfiledIterations = false;
336  return;
337  }
338  }
339  }
340 
341  // Bail if we know the statically calculated trip count.
342  // In this case we rather prefer partial unrolling.
343  if (TripCount)
344  return;
345 
346  // Do not apply profile base peeling if it is disabled.
347  if (!UP.PeelProfiledIterations)
348  return;
349  // If we don't know the trip count, but have reason to believe the average
350  // trip count is low, peeling should be beneficial, since we will usually
351  // hit the peeled section.
352  // We only do this in the presence of profile information, since otherwise
353  // our estimates of the trip count are not reliable enough.
354  if (L->getHeader()->getParent()->hasProfileData()) {
356  if (!PeelCount)
357  return;
358 
359  LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount
360  << "\n");
361 
362  if (*PeelCount) {
363  if ((*PeelCount + AlreadyPeeled <= UnrollPeelMaxCount) &&
364  (LoopSize * (*PeelCount + 1) <= UP.Threshold)) {
365  LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount
366  << " iterations.\n");
367  UP.PeelCount = *PeelCount;
368  return;
369  }
370  LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
371  LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n");
372  LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
373  LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1)
374  << "\n");
375  LLVM_DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n");
376  }
377  }
378 }
379 
380 /// Update the branch weights of the latch of a peeled-off loop
381 /// iteration.
382 /// This sets the branch weights for the latch of the recently peeled off loop
383 /// iteration correctly.
384 /// Let F is a weight of the edge from latch to header.
385 /// Let E is a weight of the edge from latch to exit.
386 /// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
387 /// go to exit.
388 /// Then, Estimated TripCount = F / E.
389 /// For I-th (counting from 0) peeled off iteration we set the the weights for
390 /// the peeled latch as (TC - I, 1). It gives us reasonable distribution,
391 /// The probability to go to exit 1/(TC-I) increases. At the same time
392 /// the estimated trip count of remaining loop reduces by I.
393 /// To avoid dealing with division rounding we can just multiple both part
394 /// of weights to E and use weight as (F - I * E, E).
395 ///
396 /// \param Header The copy of the header block that belongs to next iteration.
397 /// \param LatchBR The copy of the latch branch that belongs to this iteration.
398 /// \param[in,out] FallThroughWeight The weight of the edge from latch to
399 /// header before peeling (in) and after peeled off one iteration (out).
400 static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
401  uint64_t ExitWeight,
402  uint64_t &FallThroughWeight) {
403  // FallThroughWeight is 0 means that there is no branch weights on original
404  // latch block or estimated trip count is zero.
405  if (!FallThroughWeight)
406  return;
407 
408  unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
409  MDBuilder MDB(LatchBR->getContext());
410  MDNode *WeightNode =
411  HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)
412  : MDB.createBranchWeights(FallThroughWeight, ExitWeight);
413  LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
414  FallThroughWeight =
415  FallThroughWeight > ExitWeight ? FallThroughWeight - ExitWeight : 1;
416 }
417 
418 /// Initialize the weights.
419 ///
420 /// \param Header The header block.
421 /// \param LatchBR The latch branch.
422 /// \param[out] ExitWeight The weight of the edge from Latch to Exit.
423 /// \param[out] FallThroughWeight The weight of the edge from Latch to Header.
424 static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
425  uint64_t &ExitWeight,
426  uint64_t &FallThroughWeight) {
427  uint64_t TrueWeight, FalseWeight;
428  if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight))
429  return;
430  unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
431  ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
432  FallThroughWeight = HeaderIdx ? FalseWeight : TrueWeight;
433 }
434 
435 /// Update the weights of original Latch block after peeling off all iterations.
436 ///
437 /// \param Header The header block.
438 /// \param LatchBR The latch branch.
439 /// \param ExitWeight The weight of the edge from Latch to Exit.
440 /// \param FallThroughWeight The weight of the edge from Latch to Header.
441 static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
442  uint64_t ExitWeight,
443  uint64_t FallThroughWeight) {
444  // FallThroughWeight is 0 means that there is no branch weights on original
445  // latch block or estimated trip count is zero.
446  if (!FallThroughWeight)
447  return;
448 
449  // Sets the branch weights on the loop exit.
450  MDBuilder MDB(LatchBR->getContext());
451  unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
452  MDNode *WeightNode =
453  HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)
454  : MDB.createBranchWeights(FallThroughWeight, ExitWeight);
455  LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
456 }
457 
458 /// Clones the body of the loop L, putting it between \p InsertTop and \p
459 /// InsertBot.
460 /// \param IterNumber The serial number of the iteration currently being
461 /// peeled off.
462 /// \param ExitEdges The exit edges of the original loop.
463 /// \param[out] NewBlocks A list of the blocks in the newly created clone
464 /// \param[out] VMap The value map between the loop and the new clone.
465 /// \param LoopBlocks A helper for DFS-traversal of the loop.
466 /// \param LVMap A value-map that maps instructions from the original loop to
467 /// instructions in the last peeled-off iteration.
468 static void cloneLoopBlocks(
469  Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot,
470  SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *> > &ExitEdges,
471  SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
473  LoopInfo *LI) {
474  BasicBlock *Header = L->getHeader();
475  BasicBlock *Latch = L->getLoopLatch();
476  BasicBlock *PreHeader = L->getLoopPreheader();
477 
478  Function *F = Header->getParent();
479  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
480  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
481  Loop *ParentLoop = L->getParentLoop();
482 
483  // For each block in the original loop, create a new copy,
484  // and update the value map with the newly created values.
485  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
486  BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
487  NewBlocks.push_back(NewBB);
488 
489  if (ParentLoop)
490  ParentLoop->addBasicBlockToLoop(NewBB, *LI);
491 
492  VMap[*BB] = NewBB;
493 
494  // If dominator tree is available, insert nodes to represent cloned blocks.
495  if (DT) {
496  if (Header == *BB)
497  DT->addNewBlock(NewBB, InsertTop);
498  else {
499  DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
500  // VMap must contain entry for IDom, as the iteration order is RPO.
501  DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
502  }
503  }
504  }
505 
506  // Hook-up the control flow for the newly inserted blocks.
507  // The new header is hooked up directly to the "top", which is either
508  // the original loop preheader (for the first iteration) or the previous
509  // iteration's exiting block (for every other iteration)
510  InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));
511 
512  // Similarly, for the latch:
513  // The original exiting edge is still hooked up to the loop exit.
514  // The backedge now goes to the "bottom", which is either the loop's real
515  // header (for the last peeled iteration) or the copied header of the next
516  // iteration (for every other iteration)
517  BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
518  BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator());
519  for (unsigned idx = 0, e = LatchBR->getNumSuccessors(); idx < e; ++idx)
520  if (LatchBR->getSuccessor(idx) == Header) {
521  LatchBR->setSuccessor(idx, InsertBot);
522  break;
523  }
524  if (DT)
525  DT->changeImmediateDominator(InsertBot, NewLatch);
526 
527  // The new copy of the loop body starts with a bunch of PHI nodes
528  // that pick an incoming value from either the preheader, or the previous
529  // loop iteration. Since this copy is no longer part of the loop, we
530  // resolve this statically:
531  // For the first iteration, we use the value from the preheader directly.
532  // For any other iteration, we replace the phi with the value generated by
533  // the immediately preceding clone of the loop body (which represents
534  // the previous iteration).
535  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
536  PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
537  if (IterNumber == 0) {
538  VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader);
539  } else {
540  Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch);
541  Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
542  if (LatchInst && L->contains(LatchInst))
543  VMap[&*I] = LVMap[LatchInst];
544  else
545  VMap[&*I] = LatchVal;
546  }
547  cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
548  }
549 
550  // Fix up the outgoing values - we need to add a value for the iteration
551  // we've just created. Note that this must happen *after* the incoming
552  // values are adjusted, since the value going out of the latch may also be
553  // a value coming into the header.
554  for (auto Edge : ExitEdges)
555  for (PHINode &PHI : Edge.second->phis()) {
556  Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
557  Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
558  if (LatchInst && L->contains(LatchInst))
559  LatchVal = VMap[LatchVal];
560  PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
561  }
562 
563  // LastValueMap is updated with the values for the current loop
564  // which are used the next time this function is called.
565  for (const auto &KV : VMap)
566  LVMap[KV.first] = KV.second;
567 }
568 
569 /// Peel off the first \p PeelCount iterations of loop \p L.
570 ///
571 /// Note that this does not peel them off as a single straight-line block.
572 /// Rather, each iteration is peeled off separately, and needs to check the
573 /// exit condition.
574 /// For loops that dynamically execute \p PeelCount iterations or less
575 /// this provides a benefit, since the peeled off iterations, which account
576 /// for the bulk of dynamic execution, can be further simplified by scalar
577 /// optimizations.
578 bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
580  AssumptionCache *AC, bool PreserveLCSSA) {
581  assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
582  assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
583 
584  LoopBlocksDFS LoopBlocks(L);
585  LoopBlocks.perform(LI);
586 
587  BasicBlock *Header = L->getHeader();
588  BasicBlock *PreHeader = L->getLoopPreheader();
589  BasicBlock *Latch = L->getLoopLatch();
591  L->getExitEdges(ExitEdges);
592 
594  if (DT) {
595  // We'd like to determine the idom of exit block after peeling one
596  // iteration.
597  // Let Exit is exit block.
598  // Let ExitingSet - is a set of predecessors of Exit block. They are exiting
599  // blocks.
600  // Let Latch' and ExitingSet' are copies after a peeling.
601  // We'd like to find an idom'(Exit) - idom of Exit after peeling.
602  // It is an evident that idom'(Exit) will be the nearest common dominator
603  // of ExitingSet and ExitingSet'.
604  // idom(Exit) is a nearest common dominator of ExitingSet.
605  // idom(Exit)' is a nearest common dominator of ExitingSet'.
606  // Taking into account that we have a single Latch, Latch' will dominate
607  // Header and idom(Exit).
608  // So the idom'(Exit) is nearest common dominator of idom(Exit)' and Latch'.
609  // All these basic blocks are in the same loop, so what we find is
610  // (nearest common dominator of idom(Exit) and Latch)'.
611  // In the loop below we remember nearest common dominator of idom(Exit) and
612  // Latch to update idom of Exit later.
613  assert(L->hasDedicatedExits() && "No dedicated exits?");
614  for (auto Edge : ExitEdges) {
615  if (ExitIDom.count(Edge.second))
616  continue;
618  DT->getNode(Edge.second)->getIDom()->getBlock(), Latch);
619  assert(L->contains(BB) && "IDom is not in a loop");
620  ExitIDom[Edge.second] = BB;
621  }
622  }
623 
624  Function *F = Header->getParent();
625 
626  // Set up all the necessary basic blocks. It is convenient to split the
627  // preheader into 3 parts - two blocks to anchor the peeled copy of the loop
628  // body, and a new preheader for the "real" loop.
629 
630  // Peeling the first iteration transforms.
631  //
632  // PreHeader:
633  // ...
634  // Header:
635  // LoopBody
636  // If (cond) goto Header
637  // Exit:
638  //
639  // into
640  //
641  // InsertTop:
642  // LoopBody
643  // If (!cond) goto Exit
644  // InsertBot:
645  // NewPreHeader:
646  // ...
647  // Header:
648  // LoopBody
649  // If (cond) goto Header
650  // Exit:
651  //
652  // Each following iteration will split the current bottom anchor in two,
653  // and put the new copy of the loop body between these two blocks. That is,
654  // after peeling another iteration from the example above, we'll split
655  // InsertBot, and get:
656  //
657  // InsertTop:
658  // LoopBody
659  // If (!cond) goto Exit
660  // InsertBot:
661  // LoopBody
662  // If (!cond) goto Exit
663  // InsertBot.next:
664  // NewPreHeader:
665  // ...
666  // Header:
667  // LoopBody
668  // If (cond) goto Header
669  // Exit:
670 
671  BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI);
672  BasicBlock *InsertBot =
673  SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI);
674  BasicBlock *NewPreHeader =
675  SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
676 
677  InsertTop->setName(Header->getName() + ".peel.begin");
678  InsertBot->setName(Header->getName() + ".peel.next");
679  NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
680 
681  ValueToValueMapTy LVMap;
682 
683  // If we have branch weight information, we'll want to update it for the
684  // newly created branches.
685  BranchInst *LatchBR =
686  cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator());
687  uint64_t ExitWeight = 0, FallThroughWeight = 0;
688  initBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);
689 
690  // For each peeled-off iteration, make a copy of the loop.
691  for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
693  ValueToValueMapTy VMap;
694 
695  cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
696  LoopBlocks, VMap, LVMap, DT, LI);
697 
698  // Remap to use values from the current iteration instead of the
699  // previous one.
700  remapInstructionsInBlocks(NewBlocks, VMap);
701 
702  if (DT) {
703  // Latches of the cloned loops dominate over the loop exit, so idom of the
704  // latter is the first cloned loop body, as original PreHeader dominates
705  // the original loop body.
706  if (Iter == 0)
707  for (auto Exit : ExitIDom)
708  DT->changeImmediateDominator(Exit.first,
709  cast<BasicBlock>(LVMap[Exit.second]));
710 #ifdef EXPENSIVE_CHECKS
712 #endif
713  }
714 
715  auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]);
716  updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight);
717  // Remove Loop metadata from the latch branch instruction
718  // because it is not the Loop's latch branch anymore.
719  LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr);
720 
721  InsertTop = InsertBot;
722  InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
723  InsertBot->setName(Header->getName() + ".peel.next");
724 
725  F->getBasicBlockList().splice(InsertTop->getIterator(),
726  F->getBasicBlockList(),
727  NewBlocks[0]->getIterator(), F->end());
728  }
729 
730  // Now adjust the phi nodes in the loop header to get their initial values
731  // from the last peeled-off iteration instead of the preheader.
732  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
733  PHINode *PHI = cast<PHINode>(I);
734  Value *NewVal = PHI->getIncomingValueForBlock(Latch);
735  Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
736  if (LatchInst && L->contains(LatchInst))
737  NewVal = LVMap[LatchInst];
738 
739  PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
740  }
741 
742  fixupBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);
743 
744  if (Loop *ParentLoop = L->getParentLoop())
745  L = ParentLoop;
746 
747  // We modified the loop, update SE.
748  SE->forgetTopmostLoop(L);
749 
750  // Finally DomtTree must be correct.
752 
753  // FIXME: Incrementally update loop-simplify
754  simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA);
755 
756  NumPeeled++;
757 
758  // Update Metadata for count of peeled off iterations.
759  unsigned AlreadyPeeled = 0;
760  if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
761  AlreadyPeeled = *Peeled;
762  addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount);
763 
764  return true;
765 }
bool PeelProfiledIterations
Allow peeling basing on profile.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, SmallVectorImpl< std::pair< BasicBlock *, BasicBlock *> > &ExitEdges, SmallVectorImpl< BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, LoopInfo *LI)
Clones the body of the loop L, putting it between InsertTop and InsertBot.
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:211
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
static const char * PeeledCountMetaData
const SCEV * getConstant(ConstantInt *V)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
iterator end()
Definition: Function.h:682
bool hasDedicatedExits() const
Return true if no exit block for the loop has a predecessor that is outside the loop.
Definition: LoopInfoImpl.h:85
void setIncomingValueForBlock(const BasicBlock *BB, Value *V)
Set every incoming value(s) for block BB to V.
void push_back(const T &Elt)
Definition: SmallVector.h:211
The main scalar evolution driver.
static cl::opt< bool > UnrollPeelMultiDeoptExit("unroll-peel-multi-deopt-exit", cl::init(true), cl::Hidden, cl::desc("Allow peeling of loops with multiple deopt exits."))
This file contains the declarations for metadata subclasses.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:160
A cache of @llvm.assume calls within a function.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:745
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1177
BasicBlock * getSuccessor(unsigned i) const
bool isMonotonicPredicate(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred, bool &Increasing)
Return true if, for all loop invariant X, the predicate "LHS `Pred` X" is monotonically increasing or...
STATISTIC(NumFunctions, "Total number of functions")
Metadata node.
Definition: Metadata.h:863
F(f)
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Split the edge connecting specified block.
static unsigned calculateIterationsToInvariance(PHINode *Phi, Loop *L, BasicBlock *BackEdge, SmallDenseMap< PHINode *, unsigned > &IterationsToInvariance)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
static cl::opt< unsigned > UnrollPeelMaxCount("unroll-peel-max-count", cl::init(7), cl::Hidden, cl::desc("Max average trip count which will cause loop peeling."))
const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Definition: BasicBlock.cpp:174
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:831
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
bool AllowPeeling
Allow peeling off loop iterations.
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:285
RPOIterator endRPO() const
Definition: LoopIterator.h:140
BlockT * getHeader() const
Definition: LoopInfo.h:105
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, unsigned &TripCount, ScalarEvolution &SE)
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS...
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:237
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:1099
void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V=0)
Set input string into loop metadata by keeping other values intact.
Definition: LoopUtils.cpp:207
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR, uint64_t ExitWeight, uint64_t FallThroughWeight)
Update the weights of original Latch block after peeling off all iterations.
NodeT * getBlock() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
llvm::Optional< int > getOptionalIntLoopAttribute(Loop *TheLoop, StringRef Name)
Find named metadata for a loop with an integer value.
Definition: LoopUtils.cpp:284
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop...
Definition: LoopInfo.h:208
Conditional or Unconditional Branch instruction.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node&#39;s...
static cl::opt< unsigned > UnrollForcePeelCount("unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information."))
DomTreeNodeBase * getIDom() const
Value * getIncomingValueForBlock(const BasicBlock *BB) const
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA)
Peel off the first PeelCount iterations of loop L.
static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE)
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
void splice(iterator where, iplist_impl &L2)
Definition: ilist.h:327
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
Definition: LoopIterator.h:101
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
self_iterator getIterator()
Definition: ilist_node.h:81
Optional< unsigned > getLoopEstimatedTripCount(Loop *L)
Get a loop&#39;s estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:675
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:308
static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR, uint64_t &ExitWeight, uint64_t &FallThroughWeight)
Initialize the weights.
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:131
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
void getUniqueNonLatchExitBlocks(SmallVectorImpl< BlockT *> &ExitBlocks) const
Return all unique successor blocks of this loop except successors from Latch block are not considered...
Definition: LoopInfoImpl.h:122
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1222
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void getExitEdges(SmallVectorImpl< Edge > &ExitEdges) const
Return all pairs of (inside_block,outside_block).
Definition: LoopInfoImpl.h:141
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:115
Iterator for intrusive lists based on ilist_node.
Type * getType() const
Return the LLVM type of this SCEV expression.
static const unsigned InfiniteIterationsToInvariance
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool isConditional() const
bool canPeel(Loop *L)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
Store the result of a depth first search within basic blocks contained by a single loop...
Definition: LoopIterator.h:97
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
LoopT * getParentLoop() const
Definition: LoopInfo.h:106
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:460
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
unsigned Threshold
The cost threshold for the unrolled loop.
This class represents an analyzed expression in the program.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
Parameters that control the generic loop unrolling transformation.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
const BasicBlockListType & getBasicBlockList() const
Get the underlying elements of the Function...
Definition: Function.h:657
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:171
static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, uint64_t ExitWeight, uint64_t &FallThroughWeight)
Update the branch weights of the latch of a peeled-off loop iteration.
bool empty() const
Definition: LoopInfo.h:151
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:136
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Split the specified block at the specified instruction - everything before SplitPt stays in Old and e...
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:847
This pass exposes codegen information to IR-level passes.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Definition: Metadata.cpp:1311
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:161
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
const BasicBlock * getParent() const
Definition: Instruction.h:66
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
void forgetTopmostLoop(const Loop *L)
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock *> &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.