LLVM  14.0.0git
LoopPeel.cpp
Go to the documentation of this file.
1 //===- LoopPeel.cpp -------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Loop Peeling Utilities.
10 //===----------------------------------------------------------------------===//
11 
13 #include "llvm/ADT/DenseMap.h"
14 #include "llvm/ADT/Optional.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/IR/BasicBlock.h"
23 #include "llvm/IR/Dominators.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/InstrTypes.h"
26 #include "llvm/IR/Instruction.h"
27 #include "llvm/IR/Instructions.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/IR/MDBuilder.h"
30 #include "llvm/IR/Metadata.h"
31 #include "llvm/IR/PatternMatch.h"
32 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/Debug.h"
42 #include <algorithm>
43 #include <cassert>
44 #include <cstdint>
45 #include <limits>
46 
47 using namespace llvm;
48 using namespace llvm::PatternMatch;
49 
50 #define DEBUG_TYPE "loop-peel"
51 
52 STATISTIC(NumPeeled, "Number of loops peeled");
53 
55  "unroll-peel-count", cl::Hidden,
56  cl::desc("Set the unroll peeling count, for testing purposes"));
57 
58 static cl::opt<bool>
59  UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
60  cl::desc("Allows loops to be peeled when the dynamic "
61  "trip count is known to be low."));
62 
63 static cl::opt<bool>
64  UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling",
65  cl::init(false), cl::Hidden,
66  cl::desc("Allows loop nests to be peeled."));
67 
69  "unroll-peel-max-count", cl::init(7), cl::Hidden,
70  cl::desc("Max average trip count which will cause loop peeling."));
71 
73  "unroll-force-peel-count", cl::init(0), cl::Hidden,
74  cl::desc("Force a peel count regardless of profiling information."));
75 
76 static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
77 
78 // Designates that a Phi is estimated to become invariant after an "infinite"
79 // number of loop iterations (i.e. only may become an invariant if the loop is
80 // fully unrolled).
81 static const unsigned InfiniteIterationsToInvariance =
83 
84 // Check whether we are capable of peeling this loop.
85 bool llvm::canPeel(Loop *L) {
86  // Make sure the loop is in simplified form
87  if (!L->isLoopSimplifyForm())
88  return false;
89 
90  // Don't try to peel loops where the latch is not the exiting block.
91  // This can be an indication of two different things:
92  // 1) The loop is not rotated.
93  // 2) The loop contains irreducible control flow that involves the latch.
94  const BasicBlock *Latch = L->getLoopLatch();
95  if (!L->isLoopExiting(Latch))
96  return false;
97 
98  // Peeling is only supported if the latch is a branch.
99  if (!isa<BranchInst>(Latch->getTerminator()))
100  return false;
101 
103  L->getUniqueNonLatchExitBlocks(Exits);
104  // The latch must either be the only exiting block or all non-latch exit
105  // blocks have either a deopt or unreachable terminator. Both deopt and
106  // unreachable terminators are a strong indication they are not taken. Note
107  // that this is a profitability check, not a legality check. Also note that
108  // LoopPeeling currently can only update the branch weights of latch blocks
109  // and branch weights to blocks with deopt or unreachable do not need
110  // updating.
111  return all_of(Exits, [](const BasicBlock *BB) {
112  return BB->getTerminatingDeoptimizeCall() ||
113  isa<UnreachableInst>(BB->getTerminator());
114  });
115 }
116 
117 // This function calculates the number of iterations after which the given Phi
118 // becomes an invariant. The pre-calculated values are memorized in the map. The
119 // function (shortcut is I) is calculated according to the following definition:
120 // Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
121 // If %y is a loop invariant, then I(%x) = 1.
122 // If %y is a Phi from the loop header, I(%x) = I(%y) + 1.
123 // Otherwise, I(%x) is infinite.
124 // TODO: Actually if %y is an expression that depends only on Phi %z and some
125 // loop invariants, we can estimate I(%x) = I(%z) + 1. The example
126 // looks like:
127 // %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration.
128 // %y = phi(0, 5),
129 // %a = %y + 1.
131  PHINode *Phi, Loop *L, BasicBlock *BackEdge,
132  SmallDenseMap<PHINode *, unsigned> &IterationsToInvariance) {
133  assert(Phi->getParent() == L->getHeader() &&
134  "Non-loop Phi should not be checked for turning into invariant.");
135  assert(BackEdge == L->getLoopLatch() && "Wrong latch?");
136  // If we already know the answer, take it from the map.
137  auto I = IterationsToInvariance.find(Phi);
138  if (I != IterationsToInvariance.end())
139  return I->second;
140 
141  // Otherwise we need to analyze the input from the back edge.
142  Value *Input = Phi->getIncomingValueForBlock(BackEdge);
143  // Place infinity to map to avoid infinite recursion for cycled Phis. Such
144  // cycles can never stop on an invariant.
145  IterationsToInvariance[Phi] = InfiniteIterationsToInvariance;
146  unsigned ToInvariance = InfiniteIterationsToInvariance;
147 
148  if (L->isLoopInvariant(Input))
149  ToInvariance = 1u;
150  else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) {
151  // Only consider Phis in header block.
152  if (IncPhi->getParent() != L->getHeader())
154  // If the input becomes an invariant after X iterations, then our Phi
155  // becomes an invariant after X + 1 iterations.
156  unsigned InputToInvariance = calculateIterationsToInvariance(
157  IncPhi, L, BackEdge, IterationsToInvariance);
158  if (InputToInvariance != InfiniteIterationsToInvariance)
159  ToInvariance = InputToInvariance + 1u;
160  }
161 
162  // If we found that this Phi lies in an invariant chain, update the map.
163  if (ToInvariance != InfiniteIterationsToInvariance)
164  IterationsToInvariance[Phi] = ToInvariance;
165  return ToInvariance;
166 }
167 
168 // Return the number of iterations to peel off that make conditions in the
169 // body true/false. For example, if we peel 2 iterations off the loop below,
170 // the condition i < 2 can be evaluated at compile time.
171 // for (i = 0; i < n; i++)
172 // if (i < 2)
173 // ..
174 // else
175 // ..
176 // }
177 static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
178  ScalarEvolution &SE) {
179  assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
180  unsigned DesiredPeelCount = 0;
181 
182  for (auto *BB : L.blocks()) {
183  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
184  if (!BI || BI->isUnconditional())
185  continue;
186 
187  // Ignore loop exit condition.
188  if (L.getLoopLatch() == BB)
189  continue;
190 
191  Value *Condition = BI->getCondition();
192  Value *LeftVal, *RightVal;
193  CmpInst::Predicate Pred;
194  if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
195  continue;
196 
197  const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
198  const SCEV *RightSCEV = SE.getSCEV(RightVal);
199 
200  // Do not consider predicates that are known to be true or false
201  // independently of the loop iteration.
202  if (SE.evaluatePredicate(Pred, LeftSCEV, RightSCEV))
203  continue;
204 
205  // Check if we have a condition with one AddRec and one non AddRec
206  // expression. Normalize LeftSCEV to be the AddRec.
207  if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
208  if (isa<SCEVAddRecExpr>(RightSCEV)) {
209  std::swap(LeftSCEV, RightSCEV);
210  Pred = ICmpInst::getSwappedPredicate(Pred);
211  } else
212  continue;
213  }
214 
215  const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
216 
217  // Avoid huge SCEV computations in the loop below, make sure we only
218  // consider AddRecs of the loop we are trying to peel.
219  if (!LeftAR->isAffine() || LeftAR->getLoop() != &L)
220  continue;
221  if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) &&
222  !SE.getMonotonicPredicateType(LeftAR, Pred))
223  continue;
224 
225  // Check if extending the current DesiredPeelCount lets us evaluate Pred
226  // or !Pred in the loop body statically.
227  unsigned NewPeelCount = DesiredPeelCount;
228 
229  const SCEV *IterVal = LeftAR->evaluateAtIteration(
230  SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);
231 
232  // If the original condition is not known, get the negated predicate
233  // (which holds on the else branch) and check if it is known. This allows
234  // us to peel of iterations that make the original condition false.
235  if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
236  Pred = ICmpInst::getInversePredicate(Pred);
237 
238  const SCEV *Step = LeftAR->getStepRecurrence(SE);
239  const SCEV *NextIterVal = SE.getAddExpr(IterVal, Step);
240  auto PeelOneMoreIteration = [&IterVal, &NextIterVal, &SE, Step,
241  &NewPeelCount]() {
242  IterVal = NextIterVal;
243  NextIterVal = SE.getAddExpr(IterVal, Step);
244  NewPeelCount++;
245  };
246 
247  auto CanPeelOneMoreIteration = [&NewPeelCount, &MaxPeelCount]() {
248  return NewPeelCount < MaxPeelCount;
249  };
250 
251  while (CanPeelOneMoreIteration() &&
252  SE.isKnownPredicate(Pred, IterVal, RightSCEV))
253  PeelOneMoreIteration();
254 
255  // With *that* peel count, does the predicate !Pred become known in the
256  // first iteration of the loop body after peeling?
257  if (!SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
258  RightSCEV))
259  continue; // If not, give up.
260 
261  // However, for equality comparisons, that isn't always sufficient to
262  // eliminate the comparsion in loop body, we may need to peel one more
263  // iteration. See if that makes !Pred become unknown again.
264  if (ICmpInst::isEquality(Pred) &&
265  !SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), NextIterVal,
266  RightSCEV) &&
267  !SE.isKnownPredicate(Pred, IterVal, RightSCEV) &&
268  SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) {
269  if (!CanPeelOneMoreIteration())
270  continue; // Need to peel one more iteration, but can't. Give up.
271  PeelOneMoreIteration(); // Great!
272  }
273 
274  DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount);
275  }
276 
277  return DesiredPeelCount;
278 }
279 
280 // Return the number of iterations we want to peel off.
281 void llvm::computePeelCount(Loop *L, unsigned LoopSize,
283  unsigned &TripCount, ScalarEvolution &SE,
284  unsigned Threshold) {
285  assert(LoopSize > 0 && "Zero loop size is not allowed!");
286  // Save the PP.PeelCount value set by the target in
287  // TTI.getPeelingPreferences or by the flag -unroll-peel-count.
288  unsigned TargetPeelCount = PP.PeelCount;
289  PP.PeelCount = 0;
290  if (!canPeel(L))
291  return;
292 
293  // Only try to peel innermost loops by default.
294  // The constraint can be relaxed by the target in TTI.getUnrollingPreferences
295  // or by the flag -unroll-allow-loop-nests-peeling.
296  if (!PP.AllowLoopNestsPeeling && !L->isInnermost())
297  return;
298 
299  // If the user provided a peel count, use that.
300  bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
301  if (UserPeelCount) {
302  LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
303  << " iterations.\n");
305  PP.PeelProfiledIterations = true;
306  return;
307  }
308 
309  // Skip peeling if it's disabled.
310  if (!PP.AllowPeeling)
311  return;
312 
313  unsigned AlreadyPeeled = 0;
314  if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
315  AlreadyPeeled = *Peeled;
316  // Stop if we already peeled off the maximum number of iterations.
317  if (AlreadyPeeled >= UnrollPeelMaxCount)
318  return;
319 
320  // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
321  // iterations of the loop. For this we compute the number for iterations after
322  // which every Phi is guaranteed to become an invariant, and try to peel the
323  // maximum number of iterations among these values, thus turning all those
324  // Phis into invariants.
325  // First, check that we can peel at least one iteration.
326  if (2 * LoopSize <= Threshold && UnrollPeelMaxCount > 0) {
327  // Store the pre-calculated values here.
328  SmallDenseMap<PHINode *, unsigned> IterationsToInvariance;
329  // Now go through all Phis to calculate their the number of iterations they
330  // need to become invariants.
331  // Start the max computation with the UP.PeelCount value set by the target
332  // in TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
333  unsigned DesiredPeelCount = TargetPeelCount;
334  BasicBlock *BackEdge = L->getLoopLatch();
335  assert(BackEdge && "Loop is not in simplified form?");
336  for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) {
337  PHINode *Phi = cast<PHINode>(&*BI);
338  unsigned ToInvariance = calculateIterationsToInvariance(
339  Phi, L, BackEdge, IterationsToInvariance);
340  if (ToInvariance != InfiniteIterationsToInvariance)
341  DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance);
342  }
343 
344  // Pay respect to limitations implied by loop size and the max peel count.
345  unsigned MaxPeelCount = UnrollPeelMaxCount;
346  MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1);
347 
348  DesiredPeelCount = std::max(DesiredPeelCount,
349  countToEliminateCompares(*L, MaxPeelCount, SE));
350 
351  if (DesiredPeelCount > 0) {
352  DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
353  // Consider max peel count limitation.
354  assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
355  if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
356  LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
357  << " iteration(s) to turn"
358  << " some Phis into invariants.\n");
359  PP.PeelCount = DesiredPeelCount;
360  PP.PeelProfiledIterations = false;
361  return;
362  }
363  }
364  }
365 
366  // Bail if we know the statically calculated trip count.
367  // In this case we rather prefer partial unrolling.
368  if (TripCount)
369  return;
370 
371  // Do not apply profile base peeling if it is disabled.
372  if (!PP.PeelProfiledIterations)
373  return;
374  // If we don't know the trip count, but have reason to believe the average
375  // trip count is low, peeling should be beneficial, since we will usually
376  // hit the peeled section.
377  // We only do this in the presence of profile information, since otherwise
378  // our estimates of the trip count are not reliable enough.
379  if (L->getHeader()->getParent()->hasProfileData()) {
381  if (!PeelCount)
382  return;
383 
384  LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount
385  << "\n");
386 
387  if (*PeelCount) {
388  if ((*PeelCount + AlreadyPeeled <= UnrollPeelMaxCount) &&
389  (LoopSize * (*PeelCount + 1) <= Threshold)) {
390  LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount
391  << " iterations.\n");
392  PP.PeelCount = *PeelCount;
393  return;
394  }
395  LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
396  LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n");
397  LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
398  LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1)
399  << "\n");
400  LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n");
401  }
402  }
403 }
404 
405 /// Update the branch weights of the latch of a peeled-off loop
406 /// iteration.
407 /// This sets the branch weights for the latch of the recently peeled off loop
408 /// iteration correctly.
409 /// Let F is a weight of the edge from latch to header.
410 /// Let E is a weight of the edge from latch to exit.
411 /// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
412 /// go to exit.
413 /// Then, Estimated TripCount = F / E.
414 /// For I-th (counting from 0) peeled off iteration we set the the weights for
415 /// the peeled latch as (TC - I, 1). It gives us reasonable distribution,
416 /// The probability to go to exit 1/(TC-I) increases. At the same time
417 /// the estimated trip count of remaining loop reduces by I.
418 /// To avoid dealing with division rounding we can just multiple both part
419 /// of weights to E and use weight as (F - I * E, E).
420 ///
421 /// \param Header The copy of the header block that belongs to next iteration.
422 /// \param LatchBR The copy of the latch branch that belongs to this iteration.
423 /// \param[in,out] FallThroughWeight The weight of the edge from latch to
424 /// header before peeling (in) and after peeled off one iteration (out).
425 static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
426  uint64_t ExitWeight,
427  uint64_t &FallThroughWeight) {
428  // FallThroughWeight is 0 means that there is no branch weights on original
429  // latch block or estimated trip count is zero.
430  if (!FallThroughWeight)
431  return;
432 
433  unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
434  MDBuilder MDB(LatchBR->getContext());
435  MDNode *WeightNode =
436  HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)
437  : MDB.createBranchWeights(FallThroughWeight, ExitWeight);
438  LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
439  FallThroughWeight =
440  FallThroughWeight > ExitWeight ? FallThroughWeight - ExitWeight : 1;
441 }
442 
443 /// Initialize the weights.
444 ///
445 /// \param Header The header block.
446 /// \param LatchBR The latch branch.
447 /// \param[out] ExitWeight The weight of the edge from Latch to Exit.
448 /// \param[out] FallThroughWeight The weight of the edge from Latch to Header.
449 static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
450  uint64_t &ExitWeight,
451  uint64_t &FallThroughWeight) {
452  uint64_t TrueWeight, FalseWeight;
453  if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight))
454  return;
455  unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
456  ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
457  FallThroughWeight = HeaderIdx ? FalseWeight : TrueWeight;
458 }
459 
460 /// Update the weights of original Latch block after peeling off all iterations.
461 ///
462 /// \param Header The header block.
463 /// \param LatchBR The latch branch.
464 /// \param ExitWeight The weight of the edge from Latch to Exit.
465 /// \param FallThroughWeight The weight of the edge from Latch to Header.
466 static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
467  uint64_t ExitWeight,
468  uint64_t FallThroughWeight) {
469  // FallThroughWeight is 0 means that there is no branch weights on original
470  // latch block or estimated trip count is zero.
471  if (!FallThroughWeight)
472  return;
473 
474  // Sets the branch weights on the loop exit.
475  MDBuilder MDB(LatchBR->getContext());
476  unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
477  MDNode *WeightNode =
478  HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)
479  : MDB.createBranchWeights(FallThroughWeight, ExitWeight);
480  LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
481 }
482 
483 /// Clones the body of the loop L, putting it between \p InsertTop and \p
484 /// InsertBot.
485 /// \param IterNumber The serial number of the iteration currently being
486 /// peeled off.
487 /// \param ExitEdges The exit edges of the original loop.
488 /// \param[out] NewBlocks A list of the blocks in the newly created clone
489 /// \param[out] VMap The value map between the loop and the new clone.
490 /// \param LoopBlocks A helper for DFS-traversal of the loop.
491 /// \param LVMap A value-map that maps instructions from the original loop to
492 /// instructions in the last peeled-off iteration.
493 static void cloneLoopBlocks(
494  Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot,
495  SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
496  SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
498  LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes) {
499  BasicBlock *Header = L->getHeader();
500  BasicBlock *Latch = L->getLoopLatch();
501  BasicBlock *PreHeader = L->getLoopPreheader();
502 
503  Function *F = Header->getParent();
504  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
505  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
506  Loop *ParentLoop = L->getParentLoop();
507 
508  // For each block in the original loop, create a new copy,
509  // and update the value map with the newly created values.
510  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
511  BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
512  NewBlocks.push_back(NewBB);
513 
514  // If an original block is an immediate child of the loop L, its copy
515  // is a child of a ParentLoop after peeling. If a block is a child of
516  // a nested loop, it is handled in the cloneLoop() call below.
517  if (ParentLoop && LI->getLoopFor(*BB) == L)
518  ParentLoop->addBasicBlockToLoop(NewBB, *LI);
519 
520  VMap[*BB] = NewBB;
521 
522  // If dominator tree is available, insert nodes to represent cloned blocks.
523  if (DT) {
524  if (Header == *BB)
525  DT->addNewBlock(NewBB, InsertTop);
526  else {
527  DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
528  // VMap must contain entry for IDom, as the iteration order is RPO.
529  DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
530  }
531  }
532  }
533 
534  {
535  // Identify what other metadata depends on the cloned version. After
536  // cloning, replace the metadata with the corrected version for both
537  // memory instructions and noalias intrinsics.
538  std::string Ext = (Twine("Peel") + Twine(IterNumber)).str();
539  cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
540  Header->getContext(), Ext);
541  }
542 
543  // Recursively create the new Loop objects for nested loops, if any,
544  // to preserve LoopInfo.
545  for (Loop *ChildLoop : *L) {
546  cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr);
547  }
548 
549  // Hook-up the control flow for the newly inserted blocks.
550  // The new header is hooked up directly to the "top", which is either
551  // the original loop preheader (for the first iteration) or the previous
552  // iteration's exiting block (for every other iteration)
553  InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));
554 
555  // Similarly, for the latch:
556  // The original exiting edge is still hooked up to the loop exit.
557  // The backedge now goes to the "bottom", which is either the loop's real
558  // header (for the last peeled iteration) or the copied header of the next
559  // iteration (for every other iteration)
560  BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
561  BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator());
562  for (unsigned idx = 0, e = LatchBR->getNumSuccessors(); idx < e; ++idx)
563  if (LatchBR->getSuccessor(idx) == Header) {
564  LatchBR->setSuccessor(idx, InsertBot);
565  break;
566  }
567  if (DT)
568  DT->changeImmediateDominator(InsertBot, NewLatch);
569 
570  // The new copy of the loop body starts with a bunch of PHI nodes
571  // that pick an incoming value from either the preheader, or the previous
572  // loop iteration. Since this copy is no longer part of the loop, we
573  // resolve this statically:
574  // For the first iteration, we use the value from the preheader directly.
575  // For any other iteration, we replace the phi with the value generated by
576  // the immediately preceding clone of the loop body (which represents
577  // the previous iteration).
578  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
579  PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
580  if (IterNumber == 0) {
581  VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader);
582  } else {
583  Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch);
584  Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
585  if (LatchInst && L->contains(LatchInst))
586  VMap[&*I] = LVMap[LatchInst];
587  else
588  VMap[&*I] = LatchVal;
589  }
590  cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
591  }
592 
593  // Fix up the outgoing values - we need to add a value for the iteration
594  // we've just created. Note that this must happen *after* the incoming
595  // values are adjusted, since the value going out of the latch may also be
596  // a value coming into the header.
597  for (auto Edge : ExitEdges)
598  for (PHINode &PHI : Edge.second->phis()) {
599  Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
600  Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
601  if (LatchInst && L->contains(LatchInst))
602  LatchVal = VMap[LatchVal];
603  PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
604  }
605 
606  // LastValueMap is updated with the values for the current loop
607  // which are used the next time this function is called.
608  for (auto KV : VMap)
609  LVMap[KV.first] = KV.second;
610 }
611 
614  Optional<bool> UserAllowPeeling,
615  Optional<bool> UserAllowProfileBasedPeeling, bool UnrollingSpecficValues) {
617 
618  // Set the default values.
619  PP.PeelCount = 0;
620  PP.AllowPeeling = true;
621  PP.AllowLoopNestsPeeling = false;
622  PP.PeelProfiledIterations = true;
623 
624  // Get the target specifc values.
625  TTI.getPeelingPreferences(L, SE, PP);
626 
627  // User specified values using cl::opt.
628  if (UnrollingSpecficValues) {
629  if (UnrollPeelCount.getNumOccurrences() > 0)
635  }
636 
637  // User specifed values provided by argument.
638  if (UserAllowPeeling.hasValue())
639  PP.AllowPeeling = *UserAllowPeeling;
640  if (UserAllowProfileBasedPeeling.hasValue())
641  PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
642 
643  return PP;
644 }
645 
646 /// Peel off the first \p PeelCount iterations of loop \p L.
647 ///
648 /// Note that this does not peel them off as a single straight-line block.
649 /// Rather, each iteration is peeled off separately, and needs to check the
650 /// exit condition.
651 /// For loops that dynamically execute \p PeelCount iterations or less
652 /// this provides a benefit, since the peeled off iterations, which account
653 /// for the bulk of dynamic execution, can be further simplified by scalar
654 /// optimizations.
655 bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
657  bool PreserveLCSSA) {
658  assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
659  assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
660 
661  LoopBlocksDFS LoopBlocks(L);
662  LoopBlocks.perform(LI);
663 
664  BasicBlock *Header = L->getHeader();
665  BasicBlock *PreHeader = L->getLoopPreheader();
666  BasicBlock *Latch = L->getLoopLatch();
668  L->getExitEdges(ExitEdges);
669 
671  if (DT) {
672  // We'd like to determine the idom of exit block after peeling one
673  // iteration.
674  // Let Exit is exit block.
675  // Let ExitingSet - is a set of predecessors of Exit block. They are exiting
676  // blocks.
677  // Let Latch' and ExitingSet' are copies after a peeling.
678  // We'd like to find an idom'(Exit) - idom of Exit after peeling.
679  // It is an evident that idom'(Exit) will be the nearest common dominator
680  // of ExitingSet and ExitingSet'.
681  // idom(Exit) is a nearest common dominator of ExitingSet.
682  // idom(Exit)' is a nearest common dominator of ExitingSet'.
683  // Taking into account that we have a single Latch, Latch' will dominate
684  // Header and idom(Exit).
685  // So the idom'(Exit) is nearest common dominator of idom(Exit)' and Latch'.
686  // All these basic blocks are in the same loop, so what we find is
687  // (nearest common dominator of idom(Exit) and Latch)'.
688  // In the loop below we remember nearest common dominator of idom(Exit) and
689  // Latch to update idom of Exit later.
690  assert(L->hasDedicatedExits() && "No dedicated exits?");
691  for (auto Edge : ExitEdges) {
692  if (ExitIDom.count(Edge.second))
693  continue;
695  DT->getNode(Edge.second)->getIDom()->getBlock(), Latch);
696  assert(L->contains(BB) && "IDom is not in a loop");
697  ExitIDom[Edge.second] = BB;
698  }
699  }
700 
701  Function *F = Header->getParent();
702 
703  // Set up all the necessary basic blocks. It is convenient to split the
704  // preheader into 3 parts - two blocks to anchor the peeled copy of the loop
705  // body, and a new preheader for the "real" loop.
706 
707  // Peeling the first iteration transforms.
708  //
709  // PreHeader:
710  // ...
711  // Header:
712  // LoopBody
713  // If (cond) goto Header
714  // Exit:
715  //
716  // into
717  //
718  // InsertTop:
719  // LoopBody
720  // If (!cond) goto Exit
721  // InsertBot:
722  // NewPreHeader:
723  // ...
724  // Header:
725  // LoopBody
726  // If (cond) goto Header
727  // Exit:
728  //
729  // Each following iteration will split the current bottom anchor in two,
730  // and put the new copy of the loop body between these two blocks. That is,
731  // after peeling another iteration from the example above, we'll split
732  // InsertBot, and get:
733  //
734  // InsertTop:
735  // LoopBody
736  // If (!cond) goto Exit
737  // InsertBot:
738  // LoopBody
739  // If (!cond) goto Exit
740  // InsertBot.next:
741  // NewPreHeader:
742  // ...
743  // Header:
744  // LoopBody
745  // If (cond) goto Header
746  // Exit:
747 
748  BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI);
749  BasicBlock *InsertBot =
750  SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI);
751  BasicBlock *NewPreHeader =
752  SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
753 
754  InsertTop->setName(Header->getName() + ".peel.begin");
755  InsertBot->setName(Header->getName() + ".peel.next");
756  NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
757 
758  ValueToValueMapTy LVMap;
759 
760  // If we have branch weight information, we'll want to update it for the
761  // newly created branches.
762  BranchInst *LatchBR =
763  cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator());
764  uint64_t ExitWeight = 0, FallThroughWeight = 0;
765  initBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);
766 
767  // Identify what noalias metadata is inside the loop: if it is inside the
768  // loop, the associated metadata must be cloned for each iteration.
769  SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
770  identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
771 
772  // For each peeled-off iteration, make a copy of the loop.
773  for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
775  ValueToValueMapTy VMap;
776 
777  cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
778  LoopBlocks, VMap, LVMap, DT, LI,
779  LoopLocalNoAliasDeclScopes);
780 
781  // Remap to use values from the current iteration instead of the
782  // previous one.
783  remapInstructionsInBlocks(NewBlocks, VMap);
784 
785  if (DT) {
786  // Latches of the cloned loops dominate over the loop exit, so idom of the
787  // latter is the first cloned loop body, as original PreHeader dominates
788  // the original loop body.
789  if (Iter == 0)
790  for (auto Exit : ExitIDom)
791  DT->changeImmediateDominator(Exit.first,
792  cast<BasicBlock>(LVMap[Exit.second]));
793 #ifdef EXPENSIVE_CHECKS
795 #endif
796  }
797 
798  auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]);
799  updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight);
800  // Remove Loop metadata from the latch branch instruction
801  // because it is not the Loop's latch branch anymore.
802  LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr);
803 
804  InsertTop = InsertBot;
805  InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
806  InsertBot->setName(Header->getName() + ".peel.next");
807 
808  F->getBasicBlockList().splice(InsertTop->getIterator(),
809  F->getBasicBlockList(),
810  NewBlocks[0]->getIterator(), F->end());
811  }
812 
813  // Now adjust the phi nodes in the loop header to get their initial values
814  // from the last peeled-off iteration instead of the preheader.
815  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
816  PHINode *PHI = cast<PHINode>(I);
817  Value *NewVal = PHI->getIncomingValueForBlock(Latch);
818  Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
819  if (LatchInst && L->contains(LatchInst))
820  NewVal = LVMap[LatchInst];
821 
822  PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
823  }
824 
825  fixupBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);
826 
827  // Update Metadata for count of peeled off iterations.
828  unsigned AlreadyPeeled = 0;
829  if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
830  AlreadyPeeled = *Peeled;
831  addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount);
832 
833  if (Loop *ParentLoop = L->getParentLoop())
834  L = ParentLoop;
835 
836  // We modified the loop, update SE.
837  SE->forgetTopmostLoop(L);
838 
839  // Finally DomtTree must be correct.
841 
842  // FIXME: Incrementally update loop-simplify
843  simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA);
844 
845  NumPeeled++;
846 
847  return true;
848 }
UnrollPeelCount
static cl::opt< unsigned > UnrollPeelCount("unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes"))
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:64
fixupBranchWeights
static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR, uint64_t ExitWeight, uint64_t FallThroughWeight)
Update the weights of original Latch block after peeling off all iterations.
Definition: LoopPeel.cpp:466
llvm::CmpInst::getSwappedPredicate
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:836
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::DominatorTreeBase::findNearestCommonDominator
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
Definition: GenericDomTree.h:468
LoopSimplify.h
InfiniteIterationsToInvariance
static const unsigned InfiniteIterationsToInvariance
Definition: LoopPeel.cpp:81
Optional.h
initBranchWeights
static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR, uint64_t &ExitWeight, uint64_t &FallThroughWeight)
Initialize the weights.
Definition: LoopPeel.cpp:449
ValueMapper.h
llvm::peelLoop
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA)
Peel off the first PeelCount iterations of loop L.
Definition: LoopPeel.cpp:655
Metadata.h
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:379
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::computePeelCount
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned &TripCount, ScalarEvolution &SE, unsigned Threshold=UINT_MAX)
Definition: LoopPeel.cpp:281
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::TargetTransformInfo::PeelingPreferences::AllowPeeling
bool AllowPeeling
Allow peeling off loop iterations.
Definition: TargetTransformInfo.h:541
UnrollPeelMaxCount
static cl::opt< unsigned > UnrollPeelMaxCount("unroll-peel-max-count", cl::init(7), cl::Hidden, cl::desc("Max average trip count which will cause loop peeling."))
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
calculateIterationsToInvariance
static unsigned calculateIterationsToInvariance(PHINode *Phi, Loop *L, BasicBlock *BackEdge, SmallDenseMap< PHINode *, unsigned > &IterationsToInvariance)
Definition: LoopPeel.cpp:130
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:461
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:820
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
ScalarEvolution.h
DenseMap.h
llvm::ICmpInst::isEquality
bool isEquality() const
Return true if this predicate is either EQ or NE.
Definition: Instructions.h:1298
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:535
llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: GenericDomTree.h:351
llvm::LoopBase::getExitEdges
void getExitEdges(SmallVectorImpl< Edge > &ExitEdges) const
Return all pairs of (inside_block,outside_block).
Definition: LoopInfoImpl.h:148
llvm::Optional< unsigned >
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::BranchInst::getNumSuccessors
unsigned getNumSuccessors() const
Definition: Instructions.h:3151
llvm::ScalarEvolution::evaluatePredicate
Optional< bool > evaluatePredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Check whether the condition described by Pred, LHS, and RHS is true or false.
Definition: ScalarEvolution.cpp:9920
llvm::DomTreeNodeBase::getIDom
DomTreeNodeBase * getIDom() const
Definition: GenericDomTree.h:89
llvm::Sched::Fast
@ Fast
Definition: TargetLowering.h:105
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::LoopBlocksDFS::beginRPO
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:136
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BranchInst::setSuccessor
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Definition: Instructions.h:3158
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1336
llvm::canPeel
bool canPeel(Loop *L)
Definition: LoopPeel.cpp:85
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:288
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
PeeledCountMetaData
static const char * PeeledCountMetaData
Definition: LoopPeel.cpp:76
Instruction.h
CommandLine.h
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1547
llvm::TargetTransformInfo::PeelingPreferences::PeelProfiledIterations
bool PeelProfiledIterations
Allow peeling basing on profile.
Definition: TargetTransformInfo.h:548
llvm::getOptionalIntLoopAttribute
llvm::Optional< int > getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name)
Find named metadata for a loop with an integer value.
Definition: LoopInfo.cpp:1091
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
countToEliminateCompares
static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE)
Definition: LoopPeel.cpp:177
InstrTypes.h
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::remapInstructionsInBlocks
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock * > &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
Definition: CloneFunction.cpp:787
llvm::MDBuilder::createBranchWeights
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
UnrollForcePeelCount
static cl::opt< unsigned > UnrollForcePeelCount("unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information."))
llvm::cloneLoop
Loop * cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM)
Recursively clone the specified loop and all of its children, mapping the blocks with the specified m...
Definition: LoopUtils.cpp:1446
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2810
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::Instruction
Definition: Instruction.h:45
llvm::ScalarEvolution::isKnownPredicate
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
Definition: ScalarEvolution.cpp:9905
llvm::LoopBlocksDFS::endRPO
RPOIterator endRPO() const
Definition: LoopIterator.h:140
MDBuilder.h
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:364
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:404
LoopUtils.h
llvm::DominatorTreeBase::changeImmediateDominator
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
Definition: GenericDomTree.h:655
PatternMatch.h
llvm::Instruction::extractProfMetadata
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Definition: Metadata.cpp:1396
LoopIterator.h
LoopInfo.h
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4066
llvm::LoopBlocksDFS::RPOIterator
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
Definition: LoopIterator.h:101
llvm::LoopBlocksDFS
Store the result of a depth first search within basic blocks contained by a single loop.
Definition: LoopIterator.h:97
llvm::cloneAndAdaptNoAliasScopes
void cloneAndAdaptNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, ArrayRef< BasicBlock * > NewBlocks, LLVMContext &Context, StringRef Ext)
Clone the specified noalias decl scopes.
Definition: CloneFunction.cpp:991
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:478
llvm::CloneBasicBlock
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
Definition: CloneFunction.cpp:43
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1434
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:156
llvm::DomTreeNodeBase::getBlock
NodeT * getBlock() const
Definition: GenericDomTree.h:88
uint64_t
llvm::addStringMetadataToLoop
void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V=0)
Set input string into loop metadata by keeping other values intact.
Definition: LoopUtils.cpp:222
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:714
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:967
I
#define I(x, y, z)
Definition: MD5.cpp:59
Cloning.h
llvm::SCEVNAryExpr::hasNoSelfWrap
bool hasNoSelfWrap() const
Definition: ScalarEvolutionExpressions.h:225
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::ScalarEvolution::getMonotonicPredicateType
Optional< MonotonicPredicateType > getMonotonicPredicateType(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred)
If, for all loop invariant X, the predicate "LHS `Pred` X" is monotonically increasing or decreasing,...
Definition: ScalarEvolution.cpp:9964
llvm::LoopBlocksDFS::perform
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:1220
llvm::TargetTransformInfo::PeelingPreferences::AllowLoopNestsPeeling
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
Definition: TargetTransformInfo.h:543
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::LoopBase::hasDedicatedExits
bool hasDedicatedExits() const
Return true if no exit block for the loop has a predecessor that is outside the loop.
Definition: LoopInfoImpl.h:92
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::PHINode::setIncomingValueForBlock
void setIncomingValueForBlock(const BasicBlock *BB, Value *V)
Set every incoming value(s) for block BB to V.
Definition: Instructions.h:2817
llvm::ScalarEvolution::forgetTopmostLoop
void forgetTopmostLoop(const Loop *L)
Definition: ScalarEvolution.cpp:7509
llvm::MDNode
Metadata node.
Definition: Metadata.h:901
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::TargetTransformInfo::PeelingPreferences::PeelCount
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Definition: TargetTransformInfo.h:539
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::identifyNoAliasScopesToClone
void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
Definition: CloneFunction.cpp:1028
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::Instruction::setSuccessor
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Definition: Instruction.cpp:789
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:446
UnrollAllowLoopNestsPeeling
static cl::opt< bool > UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden, cl::desc("Allows loop nests to be peeled."))
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:978
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
UnrollAllowPeeling
static cl::opt< bool > UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low."))
llvm::DomTreeNodeBase< BasicBlock >
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:297
llvm::ValueMap< const Value *, WeakTrackingVH >
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
llvm::LoopBase::getUniqueNonLatchExitBlocks
void getUniqueNonLatchExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop except successors from Latch block are not considered...
Definition: LoopInfoImpl.h:129
llvm::DominatorTreeBase::addNewBlock
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
Definition: GenericDomTree.h:619
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
llvm::SplitEdge
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Definition: BasicBlockUtils.cpp:493
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:364
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
updateBranchWeights
static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, uint64_t ExitWeight, uint64_t &FallThroughWeight)
Update the branch weights of the latch of a peeled-off loop iteration.
Definition: LoopPeel.cpp:425
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:352
Casting.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::simplifyLoop
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
Definition: LoopSimplify.cpp:719
llvm::Function::hasProfileData
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:302
llvm::MDBuilder
Definition: MDBuilder.h:35
ScalarEvolutionExpressions.h
Instructions.h
SmallVector.h
llvm::LoopBase::addBasicBlockToLoop
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:242
llvm::PatternMatch::m_ICmp
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:1404
Dominators.h
UnrollLoop.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition: GenericDomTree.h:802
TargetTransformInfo.h
llvm::TargetTransformInfo::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
Definition: TargetTransformInfo.cpp:326
llvm::PHINode
Definition: Instructions.h:2625
Threshold
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:379
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2419
llvm::LoopBase::isLoopExiting
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Definition: LoopInfo.h:225
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
LLVMContext.h
llvm::cl::desc
Definition: CommandLine.h:414
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3060
raw_ostream.h
LoopPeel.h
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:814
cloneLoopBlocks
static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, SmallVectorImpl< std::pair< BasicBlock *, BasicBlock * >> &ExitEdges, SmallVectorImpl< BasicBlock * > &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, LoopInfo *LI, ArrayRef< MDNode * > LoopLocalNoAliasDeclScopes)
Clones the body of the loop L, putting it between InsertTop and InsertBot.
Definition: LoopPeel.cpp:493
llvm::getLoopEstimatedTripCount
Optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:807
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::gatherPeelingPreferences
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, Optional< bool > UserAllowPeeling, Optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
Definition: LoopPeel.cpp:612
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3153
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:370
llvm::SCEVAddRecExpr::evaluateAtIteration
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
Definition: ScalarEvolution.cpp:1025