LLVM  14.0.0git
LoopPeel.cpp
Go to the documentation of this file.
1 //===- LoopPeel.cpp -------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Loop Peeling Utilities.
10 //===----------------------------------------------------------------------===//
11 
13 #include "llvm/ADT/DenseMap.h"
14 #include "llvm/ADT/Optional.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/Analysis/Loads.h"
18 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/IR/BasicBlock.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/InstrTypes.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/IR/LLVMContext.h"
30 #include "llvm/IR/MDBuilder.h"
31 #include "llvm/IR/Metadata.h"
32 #include "llvm/IR/PatternMatch.h"
33 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/Debug.h"
43 #include <algorithm>
44 #include <cassert>
45 #include <cstdint>
46 #include <limits>
47 
48 using namespace llvm;
49 using namespace llvm::PatternMatch;
50 
51 #define DEBUG_TYPE "loop-peel"
52 
53 STATISTIC(NumPeeled, "Number of loops peeled");
54 
56  "unroll-peel-count", cl::Hidden,
57  cl::desc("Set the unroll peeling count, for testing purposes"));
58 
59 static cl::opt<bool>
60  UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
61  cl::desc("Allows loops to be peeled when the dynamic "
62  "trip count is known to be low."));
63 
64 static cl::opt<bool>
65  UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling",
66  cl::init(false), cl::Hidden,
67  cl::desc("Allows loop nests to be peeled."));
68 
70  "unroll-peel-max-count", cl::init(7), cl::Hidden,
71  cl::desc("Max average trip count which will cause loop peeling."));
72 
74  "unroll-force-peel-count", cl::init(0), cl::Hidden,
75  cl::desc("Force a peel count regardless of profiling information."));
76 
77 static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
78 
79 // Check whether we are capable of peeling this loop.
80 bool llvm::canPeel(Loop *L) {
81  // Make sure the loop is in simplified form
82  if (!L->isLoopSimplifyForm())
83  return false;
84 
85  // Don't try to peel loops where the latch is not the exiting block.
86  // This can be an indication of two different things:
87  // 1) The loop is not rotated.
88  // 2) The loop contains irreducible control flow that involves the latch.
89  const BasicBlock *Latch = L->getLoopLatch();
90  if (!L->isLoopExiting(Latch))
91  return false;
92 
93  // Peeling is only supported if the latch is a branch.
94  if (!isa<BranchInst>(Latch->getTerminator()))
95  return false;
96 
99  // The latch must either be the only exiting block or all non-latch exit
100  // blocks have either a deopt or unreachable terminator or compose a chain of
101  // blocks where the last one is either deopt or unreachable terminated. Both
102  // deopt and unreachable terminators are a strong indication they are not
103  // taken. Note that this is a profitability check, not a legality check. Also
104  // note that LoopPeeling currently can only update the branch weights of latch
105  // blocks and branch weights to blocks with deopt or unreachable do not need
106  // updating.
108 }
109 
110 // This function calculates the number of iterations after which the given Phi
111 // becomes an invariant. The pre-calculated values are memorized in the map. The
112 // function (shortcut is I) is calculated according to the following definition:
113 // Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
114 // If %y is a loop invariant, then I(%x) = 1.
115 // If %y is a Phi from the loop header, I(%x) = I(%y) + 1.
116 // Otherwise, I(%x) is infinite.
117 // TODO: Actually if %y is an expression that depends only on Phi %z and some
118 // loop invariants, we can estimate I(%x) = I(%z) + 1. The example
119 // looks like:
120 // %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration.
121 // %y = phi(0, 5),
122 // %a = %y + 1.
124  PHINode *Phi, Loop *L, BasicBlock *BackEdge,
125  SmallDenseMap<PHINode *, Optional<unsigned> > &IterationsToInvariance) {
126  assert(Phi->getParent() == L->getHeader() &&
127  "Non-loop Phi should not be checked for turning into invariant.");
128  assert(BackEdge == L->getLoopLatch() && "Wrong latch?");
129  // If we already know the answer, take it from the map.
130  auto I = IterationsToInvariance.find(Phi);
131  if (I != IterationsToInvariance.end())
132  return I->second;
133 
134  // Otherwise we need to analyze the input from the back edge.
135  Value *Input = Phi->getIncomingValueForBlock(BackEdge);
136  // Place infinity to map to avoid infinite recursion for cycled Phis. Such
137  // cycles can never stop on an invariant.
138  IterationsToInvariance[Phi] = None;
139  Optional<unsigned> ToInvariance = None;
140 
141  if (L->isLoopInvariant(Input))
142  ToInvariance = 1u;
143  else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) {
144  // Only consider Phis in header block.
145  if (IncPhi->getParent() != L->getHeader())
146  return None;
147  // If the input becomes an invariant after X iterations, then our Phi
148  // becomes an invariant after X + 1 iterations.
149  auto InputToInvariance = calculateIterationsToInvariance(
150  IncPhi, L, BackEdge, IterationsToInvariance);
151  if (InputToInvariance)
152  ToInvariance = *InputToInvariance + 1u;
153  }
154 
155  // If we found that this Phi lies in an invariant chain, update the map.
156  if (ToInvariance)
157  IterationsToInvariance[Phi] = ToInvariance;
158  return ToInvariance;
159 }
160 
161 // Try to find any invariant memory reads that will become dereferenceable in
162 // the remainder loop after peeling. The load must also be used (transitively)
163 // by an exit condition. Returns the number of iterations to peel off (at the
164 // moment either 0 or 1).
166  DominatorTree &DT) {
167  // Skip loops with a single exiting block, because there should be no benefit
168  // for the heuristic below.
169  if (L.getExitingBlock())
170  return 0;
171 
172  // All non-latch exit blocks must have an UnreachableInst terminator.
173  // Otherwise the heuristic below may not be profitable.
176  if (any_of(Exits, [](const BasicBlock *BB) {
177  return !isa<UnreachableInst>(BB->getTerminator());
178  }))
179  return 0;
180 
181  // Now look for invariant loads that dominate the latch and are not known to
182  // be dereferenceable. If there are such loads and no writes, they will become
183  // dereferenceable in the loop if the first iteration is peeled off. Also
184  // collect the set of instructions controlled by such loads. Only peel if an
185  // exit condition uses (transitively) such a load.
186  BasicBlock *Header = L.getHeader();
187  BasicBlock *Latch = L.getLoopLatch();
188  SmallPtrSet<Value *, 8> LoadUsers;
189  const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
190  for (BasicBlock *BB : L.blocks()) {
191  for (Instruction &I : *BB) {
192  if (I.mayWriteToMemory())
193  return 0;
194 
195  auto Iter = LoadUsers.find(&I);
196  if (Iter != LoadUsers.end()) {
197  for (Value *U : I.users())
198  LoadUsers.insert(U);
199  }
200  // Do not look for reads in the header; they can already be hoisted
201  // without peeling.
202  if (BB == Header)
203  continue;
204  if (auto *LI = dyn_cast<LoadInst>(&I)) {
205  Value *Ptr = LI->getPointerOperand();
206  if (DT.dominates(BB, Latch) && L.isLoopInvariant(Ptr) &&
207  !isDereferenceablePointer(Ptr, LI->getType(), DL, LI, &DT))
208  for (Value *U : I.users())
209  LoadUsers.insert(U);
210  }
211  }
212  }
213  SmallVector<BasicBlock *> ExitingBlocks;
214  L.getExitingBlocks(ExitingBlocks);
215  if (any_of(ExitingBlocks, [&LoadUsers](BasicBlock *Exiting) {
216  return LoadUsers.contains(Exiting->getTerminator());
217  }))
218  return 1;
219  return 0;
220 }
221 
222 // Return the number of iterations to peel off that make conditions in the
223 // body true/false. For example, if we peel 2 iterations off the loop below,
224 // the condition i < 2 can be evaluated at compile time.
225 // for (i = 0; i < n; i++)
226 // if (i < 2)
227 // ..
228 // else
229 // ..
230 // }
231 static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
232  ScalarEvolution &SE) {
233  assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
234  unsigned DesiredPeelCount = 0;
235 
236  for (auto *BB : L.blocks()) {
237  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
238  if (!BI || BI->isUnconditional())
239  continue;
240 
241  // Ignore loop exit condition.
242  if (L.getLoopLatch() == BB)
243  continue;
244 
245  Value *Condition = BI->getCondition();
246  Value *LeftVal, *RightVal;
247  CmpInst::Predicate Pred;
248  if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
249  continue;
250 
251  const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
252  const SCEV *RightSCEV = SE.getSCEV(RightVal);
253 
254  // Do not consider predicates that are known to be true or false
255  // independently of the loop iteration.
256  if (SE.evaluatePredicate(Pred, LeftSCEV, RightSCEV))
257  continue;
258 
259  // Check if we have a condition with one AddRec and one non AddRec
260  // expression. Normalize LeftSCEV to be the AddRec.
261  if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
262  if (isa<SCEVAddRecExpr>(RightSCEV)) {
263  std::swap(LeftSCEV, RightSCEV);
264  Pred = ICmpInst::getSwappedPredicate(Pred);
265  } else
266  continue;
267  }
268 
269  const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
270 
271  // Avoid huge SCEV computations in the loop below, make sure we only
272  // consider AddRecs of the loop we are trying to peel.
273  if (!LeftAR->isAffine() || LeftAR->getLoop() != &L)
274  continue;
275  if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) &&
276  !SE.getMonotonicPredicateType(LeftAR, Pred))
277  continue;
278 
279  // Check if extending the current DesiredPeelCount lets us evaluate Pred
280  // or !Pred in the loop body statically.
281  unsigned NewPeelCount = DesiredPeelCount;
282 
283  const SCEV *IterVal = LeftAR->evaluateAtIteration(
284  SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);
285 
286  // If the original condition is not known, get the negated predicate
287  // (which holds on the else branch) and check if it is known. This allows
288  // us to peel of iterations that make the original condition false.
289  if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
290  Pred = ICmpInst::getInversePredicate(Pred);
291 
292  const SCEV *Step = LeftAR->getStepRecurrence(SE);
293  const SCEV *NextIterVal = SE.getAddExpr(IterVal, Step);
294  auto PeelOneMoreIteration = [&IterVal, &NextIterVal, &SE, Step,
295  &NewPeelCount]() {
296  IterVal = NextIterVal;
297  NextIterVal = SE.getAddExpr(IterVal, Step);
298  NewPeelCount++;
299  };
300 
301  auto CanPeelOneMoreIteration = [&NewPeelCount, &MaxPeelCount]() {
302  return NewPeelCount < MaxPeelCount;
303  };
304 
305  while (CanPeelOneMoreIteration() &&
306  SE.isKnownPredicate(Pred, IterVal, RightSCEV))
307  PeelOneMoreIteration();
308 
309  // With *that* peel count, does the predicate !Pred become known in the
310  // first iteration of the loop body after peeling?
311  if (!SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
312  RightSCEV))
313  continue; // If not, give up.
314 
315  // However, for equality comparisons, that isn't always sufficient to
316  // eliminate the comparsion in loop body, we may need to peel one more
317  // iteration. See if that makes !Pred become unknown again.
318  if (ICmpInst::isEquality(Pred) &&
319  !SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), NextIterVal,
320  RightSCEV) &&
321  !SE.isKnownPredicate(Pred, IterVal, RightSCEV) &&
322  SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) {
323  if (!CanPeelOneMoreIteration())
324  continue; // Need to peel one more iteration, but can't. Give up.
325  PeelOneMoreIteration(); // Great!
326  }
327 
328  DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount);
329  }
330 
331  return DesiredPeelCount;
332 }
333 
334 /// This "heuristic" exactly matches implicit behavior which used to exist
335 /// inside getLoopEstimatedTripCount. It was added here to keep an
336 /// improvement inside that API from causing peeling to become more agressive.
337 /// This should probably be removed.
339  BasicBlock *Latch = L->getLoopLatch();
340  if (!Latch)
341  return true;
342 
343  BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
344  if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
345  return true;
346 
347  assert((LatchBR->getSuccessor(0) == L->getHeader() ||
348  LatchBR->getSuccessor(1) == L->getHeader()) &&
349  "At least one edge out of the latch must go to the header");
350 
351  SmallVector<BasicBlock *, 4> ExitBlocks;
352  L->getUniqueNonLatchExitBlocks(ExitBlocks);
353  return any_of(ExitBlocks, [](const BasicBlock *EB) {
354  return !EB->getTerminatingDeoptimizeCall();
355  });
356 }
357 
358 
359 // Return the number of iterations we want to peel off.
360 void llvm::computePeelCount(Loop *L, unsigned LoopSize,
362  unsigned &TripCount, DominatorTree &DT,
363  ScalarEvolution &SE, unsigned Threshold) {
364  assert(LoopSize > 0 && "Zero loop size is not allowed!");
365  // Save the PP.PeelCount value set by the target in
366  // TTI.getPeelingPreferences or by the flag -unroll-peel-count.
367  unsigned TargetPeelCount = PP.PeelCount;
368  PP.PeelCount = 0;
369  if (!canPeel(L))
370  return;
371 
372  // Only try to peel innermost loops by default.
373  // The constraint can be relaxed by the target in TTI.getUnrollingPreferences
374  // or by the flag -unroll-allow-loop-nests-peeling.
375  if (!PP.AllowLoopNestsPeeling && !L->isInnermost())
376  return;
377 
378  // If the user provided a peel count, use that.
379  bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
380  if (UserPeelCount) {
381  LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
382  << " iterations.\n");
384  PP.PeelProfiledIterations = true;
385  return;
386  }
387 
388  // Skip peeling if it's disabled.
389  if (!PP.AllowPeeling)
390  return;
391 
392  unsigned AlreadyPeeled = 0;
393  if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
394  AlreadyPeeled = *Peeled;
395  // Stop if we already peeled off the maximum number of iterations.
396  if (AlreadyPeeled >= UnrollPeelMaxCount)
397  return;
398 
399  // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
400  // iterations of the loop. For this we compute the number for iterations after
401  // which every Phi is guaranteed to become an invariant, and try to peel the
402  // maximum number of iterations among these values, thus turning all those
403  // Phis into invariants.
404  // First, check that we can peel at least one iteration.
405  if (2 * LoopSize <= Threshold && UnrollPeelMaxCount > 0) {
406  // Store the pre-calculated values here.
407  SmallDenseMap<PHINode *, Optional<unsigned> > IterationsToInvariance;
408  // Now go through all Phis to calculate their the number of iterations they
409  // need to become invariants.
410  // Start the max computation with the UP.PeelCount value set by the target
411  // in TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
412  unsigned DesiredPeelCount = TargetPeelCount;
413  BasicBlock *BackEdge = L->getLoopLatch();
414  assert(BackEdge && "Loop is not in simplified form?");
415  for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) {
416  PHINode *Phi = cast<PHINode>(&*BI);
417  auto ToInvariance = calculateIterationsToInvariance(
418  Phi, L, BackEdge, IterationsToInvariance);
419  if (ToInvariance)
420  DesiredPeelCount = std::max(DesiredPeelCount, *ToInvariance);
421  }
422 
423  // Pay respect to limitations implied by loop size and the max peel count.
424  unsigned MaxPeelCount = UnrollPeelMaxCount;
425  MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1);
426 
427  DesiredPeelCount = std::max(DesiredPeelCount,
428  countToEliminateCompares(*L, MaxPeelCount, SE));
429 
430  if (DesiredPeelCount == 0)
431  DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(*L, DT);
432 
433  if (DesiredPeelCount > 0) {
434  DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
435  // Consider max peel count limitation.
436  assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
437  if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
438  LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
439  << " iteration(s) to turn"
440  << " some Phis into invariants.\n");
441  PP.PeelCount = DesiredPeelCount;
442  PP.PeelProfiledIterations = false;
443  return;
444  }
445  }
446  }
447 
448  // Bail if we know the statically calculated trip count.
449  // In this case we rather prefer partial unrolling.
450  if (TripCount)
451  return;
452 
453  // Do not apply profile base peeling if it is disabled.
454  if (!PP.PeelProfiledIterations)
455  return;
456  // If we don't know the trip count, but have reason to believe the average
457  // trip count is low, peeling should be beneficial, since we will usually
458  // hit the peeled section.
459  // We only do this in the presence of profile information, since otherwise
460  // our estimates of the trip count are not reliable enough.
461  if (L->getHeader()->getParent()->hasProfileData()) {
463  return;
465  if (!PeelCount)
466  return;
467 
468  LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount
469  << "\n");
470 
471  if (*PeelCount) {
472  if ((*PeelCount + AlreadyPeeled <= UnrollPeelMaxCount) &&
473  (LoopSize * (*PeelCount + 1) <= Threshold)) {
474  LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount
475  << " iterations.\n");
476  PP.PeelCount = *PeelCount;
477  return;
478  }
479  LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
480  LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n");
481  LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
482  LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1)
483  << "\n");
484  LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n");
485  }
486  }
487 }
488 
489 /// Update the branch weights of the latch of a peeled-off loop
490 /// iteration.
491 /// This sets the branch weights for the latch of the recently peeled off loop
492 /// iteration correctly.
493 /// Let F is a weight of the edge from latch to header.
494 /// Let E is a weight of the edge from latch to exit.
495 /// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
496 /// go to exit.
497 /// Then, Estimated TripCount = F / E.
498 /// For I-th (counting from 0) peeled off iteration we set the the weights for
499 /// the peeled latch as (TC - I, 1). It gives us reasonable distribution,
500 /// The probability to go to exit 1/(TC-I) increases. At the same time
501 /// the estimated trip count of remaining loop reduces by I.
502 /// To avoid dealing with division rounding we can just multiple both part
503 /// of weights to E and use weight as (F - I * E, E).
504 ///
505 /// \param Header The copy of the header block that belongs to next iteration.
506 /// \param LatchBR The copy of the latch branch that belongs to this iteration.
507 /// \param[in,out] FallThroughWeight The weight of the edge from latch to
508 /// header before peeling (in) and after peeled off one iteration (out).
509 static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
510  uint64_t ExitWeight,
511  uint64_t &FallThroughWeight) {
512  // FallThroughWeight is 0 means that there is no branch weights on original
513  // latch block or estimated trip count is zero.
514  if (!FallThroughWeight)
515  return;
516 
517  unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
518  MDBuilder MDB(LatchBR->getContext());
519  MDNode *WeightNode =
520  HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)
521  : MDB.createBranchWeights(FallThroughWeight, ExitWeight);
522  LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
523  FallThroughWeight =
524  FallThroughWeight > ExitWeight ? FallThroughWeight - ExitWeight : 1;
525 }
526 
527 /// Initialize the weights.
528 ///
529 /// \param Header The header block.
530 /// \param LatchBR The latch branch.
531 /// \param[out] ExitWeight The weight of the edge from Latch to Exit.
532 /// \param[out] FallThroughWeight The weight of the edge from Latch to Header.
533 static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
534  uint64_t &ExitWeight,
535  uint64_t &FallThroughWeight) {
536  uint64_t TrueWeight, FalseWeight;
537  if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight))
538  return;
539  unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
540  ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
541  FallThroughWeight = HeaderIdx ? FalseWeight : TrueWeight;
542 }
543 
544 /// Update the weights of original Latch block after peeling off all iterations.
545 ///
546 /// \param Header The header block.
547 /// \param LatchBR The latch branch.
548 /// \param ExitWeight The weight of the edge from Latch to Exit.
549 /// \param FallThroughWeight The weight of the edge from Latch to Header.
550 static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
551  uint64_t ExitWeight,
552  uint64_t FallThroughWeight) {
553  // FallThroughWeight is 0 means that there is no branch weights on original
554  // latch block or estimated trip count is zero.
555  if (!FallThroughWeight)
556  return;
557 
558  // Sets the branch weights on the loop exit.
559  MDBuilder MDB(LatchBR->getContext());
560  unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
561  MDNode *WeightNode =
562  HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)
563  : MDB.createBranchWeights(FallThroughWeight, ExitWeight);
564  LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
565 }
566 
567 /// Clones the body of the loop L, putting it between \p InsertTop and \p
568 /// InsertBot.
569 /// \param IterNumber The serial number of the iteration currently being
570 /// peeled off.
571 /// \param ExitEdges The exit edges of the original loop.
572 /// \param[out] NewBlocks A list of the blocks in the newly created clone
573 /// \param[out] VMap The value map between the loop and the new clone.
574 /// \param LoopBlocks A helper for DFS-traversal of the loop.
575 /// \param LVMap A value-map that maps instructions from the original loop to
576 /// instructions in the last peeled-off iteration.
577 static void cloneLoopBlocks(
578  Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot,
579  SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
580  SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
582  LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes) {
583  BasicBlock *Header = L->getHeader();
584  BasicBlock *Latch = L->getLoopLatch();
585  BasicBlock *PreHeader = L->getLoopPreheader();
586 
587  Function *F = Header->getParent();
588  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
589  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
590  Loop *ParentLoop = L->getParentLoop();
591 
592  // For each block in the original loop, create a new copy,
593  // and update the value map with the newly created values.
594  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
595  BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
596  NewBlocks.push_back(NewBB);
597 
598  // If an original block is an immediate child of the loop L, its copy
599  // is a child of a ParentLoop after peeling. If a block is a child of
600  // a nested loop, it is handled in the cloneLoop() call below.
601  if (ParentLoop && LI->getLoopFor(*BB) == L)
602  ParentLoop->addBasicBlockToLoop(NewBB, *LI);
603 
604  VMap[*BB] = NewBB;
605 
606  // If dominator tree is available, insert nodes to represent cloned blocks.
607  if (DT) {
608  if (Header == *BB)
609  DT->addNewBlock(NewBB, InsertTop);
610  else {
611  DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
612  // VMap must contain entry for IDom, as the iteration order is RPO.
613  DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
614  }
615  }
616  }
617 
618  {
619  // Identify what other metadata depends on the cloned version. After
620  // cloning, replace the metadata with the corrected version for both
621  // memory instructions and noalias intrinsics.
622  std::string Ext = (Twine("Peel") + Twine(IterNumber)).str();
623  cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
624  Header->getContext(), Ext);
625  }
626 
627  // Recursively create the new Loop objects for nested loops, if any,
628  // to preserve LoopInfo.
629  for (Loop *ChildLoop : *L) {
630  cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr);
631  }
632 
633  // Hook-up the control flow for the newly inserted blocks.
634  // The new header is hooked up directly to the "top", which is either
635  // the original loop preheader (for the first iteration) or the previous
636  // iteration's exiting block (for every other iteration)
637  InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));
638 
639  // Similarly, for the latch:
640  // The original exiting edge is still hooked up to the loop exit.
641  // The backedge now goes to the "bottom", which is either the loop's real
642  // header (for the last peeled iteration) or the copied header of the next
643  // iteration (for every other iteration)
644  BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
645  BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator());
646  for (unsigned idx = 0, e = LatchBR->getNumSuccessors(); idx < e; ++idx)
647  if (LatchBR->getSuccessor(idx) == Header) {
648  LatchBR->setSuccessor(idx, InsertBot);
649  break;
650  }
651  if (DT)
652  DT->changeImmediateDominator(InsertBot, NewLatch);
653 
654  // The new copy of the loop body starts with a bunch of PHI nodes
655  // that pick an incoming value from either the preheader, or the previous
656  // loop iteration. Since this copy is no longer part of the loop, we
657  // resolve this statically:
658  // For the first iteration, we use the value from the preheader directly.
659  // For any other iteration, we replace the phi with the value generated by
660  // the immediately preceding clone of the loop body (which represents
661  // the previous iteration).
662  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
663  PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
664  if (IterNumber == 0) {
665  VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader);
666  } else {
667  Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch);
668  Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
669  if (LatchInst && L->contains(LatchInst))
670  VMap[&*I] = LVMap[LatchInst];
671  else
672  VMap[&*I] = LatchVal;
673  }
674  cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
675  }
676 
677  // Fix up the outgoing values - we need to add a value for the iteration
678  // we've just created. Note that this must happen *after* the incoming
679  // values are adjusted, since the value going out of the latch may also be
680  // a value coming into the header.
681  for (auto Edge : ExitEdges)
682  for (PHINode &PHI : Edge.second->phis()) {
683  Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
684  Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
685  if (LatchInst && L->contains(LatchInst))
686  LatchVal = VMap[LatchVal];
687  PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
688  }
689 
690  // LastValueMap is updated with the values for the current loop
691  // which are used the next time this function is called.
692  for (auto KV : VMap)
693  LVMap[KV.first] = KV.second;
694 }
695 
698  Optional<bool> UserAllowPeeling,
699  Optional<bool> UserAllowProfileBasedPeeling, bool UnrollingSpecficValues) {
701 
702  // Set the default values.
703  PP.PeelCount = 0;
704  PP.AllowPeeling = true;
705  PP.AllowLoopNestsPeeling = false;
706  PP.PeelProfiledIterations = true;
707 
708  // Get the target specifc values.
709  TTI.getPeelingPreferences(L, SE, PP);
710 
711  // User specified values using cl::opt.
712  if (UnrollingSpecficValues) {
713  if (UnrollPeelCount.getNumOccurrences() > 0)
719  }
720 
721  // User specifed values provided by argument.
722  if (UserAllowPeeling.hasValue())
723  PP.AllowPeeling = *UserAllowPeeling;
724  if (UserAllowProfileBasedPeeling.hasValue())
725  PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
726 
727  return PP;
728 }
729 
730 /// Peel off the first \p PeelCount iterations of loop \p L.
731 ///
732 /// Note that this does not peel them off as a single straight-line block.
733 /// Rather, each iteration is peeled off separately, and needs to check the
734 /// exit condition.
735 /// For loops that dynamically execute \p PeelCount iterations or less
736 /// this provides a benefit, since the peeled off iterations, which account
737 /// for the bulk of dynamic execution, can be further simplified by scalar
738 /// optimizations.
739 bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
741  bool PreserveLCSSA) {
742  assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
743  assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
744 
745  LoopBlocksDFS LoopBlocks(L);
746  LoopBlocks.perform(LI);
747 
748  BasicBlock *Header = L->getHeader();
749  BasicBlock *PreHeader = L->getLoopPreheader();
750  BasicBlock *Latch = L->getLoopLatch();
752  L->getExitEdges(ExitEdges);
753 
754  // Remember dominators of blocks we might reach through exits to change them
755  // later. Immediate dominator of such block might change, because we add more
756  // routes which can lead to the exit: we can reach it from the peeled
757  // iterations too.
758  DenseMap<BasicBlock *, BasicBlock *> NonLoopBlocksIDom;
759  if (DT) {
760  for (auto *BB : L->blocks()) {
761  auto *BBDomNode = DT->getNode(BB);
762  SmallVector<BasicBlock *, 16> ChildrenToUpdate;
763  for (auto *ChildDomNode : BBDomNode->children()) {
764  auto *ChildBB = ChildDomNode->getBlock();
765  if (!L->contains(ChildBB))
766  ChildrenToUpdate.push_back(ChildBB);
767  }
768  // The new idom of the block will be the nearest common dominator
769  // of all copies of the previous idom. This is equivalent to the
770  // nearest common dominator of the previous idom and the first latch,
771  // which dominates all copies of the previous idom.
772  BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, Latch);
773  for (auto *ChildBB : ChildrenToUpdate)
774  NonLoopBlocksIDom[ChildBB] = NewIDom;
775  }
776  }
777 
778  Function *F = Header->getParent();
779 
780  // Set up all the necessary basic blocks. It is convenient to split the
781  // preheader into 3 parts - two blocks to anchor the peeled copy of the loop
782  // body, and a new preheader for the "real" loop.
783 
784  // Peeling the first iteration transforms.
785  //
786  // PreHeader:
787  // ...
788  // Header:
789  // LoopBody
790  // If (cond) goto Header
791  // Exit:
792  //
793  // into
794  //
795  // InsertTop:
796  // LoopBody
797  // If (!cond) goto Exit
798  // InsertBot:
799  // NewPreHeader:
800  // ...
801  // Header:
802  // LoopBody
803  // If (cond) goto Header
804  // Exit:
805  //
806  // Each following iteration will split the current bottom anchor in two,
807  // and put the new copy of the loop body between these two blocks. That is,
808  // after peeling another iteration from the example above, we'll split
809  // InsertBot, and get:
810  //
811  // InsertTop:
812  // LoopBody
813  // If (!cond) goto Exit
814  // InsertBot:
815  // LoopBody
816  // If (!cond) goto Exit
817  // InsertBot.next:
818  // NewPreHeader:
819  // ...
820  // Header:
821  // LoopBody
822  // If (cond) goto Header
823  // Exit:
824 
825  BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI);
826  BasicBlock *InsertBot =
827  SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI);
828  BasicBlock *NewPreHeader =
829  SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
830 
831  InsertTop->setName(Header->getName() + ".peel.begin");
832  InsertBot->setName(Header->getName() + ".peel.next");
833  NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
834 
835  ValueToValueMapTy LVMap;
836 
837  // If we have branch weight information, we'll want to update it for the
838  // newly created branches.
839  BranchInst *LatchBR =
840  cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator());
841  uint64_t ExitWeight = 0, FallThroughWeight = 0;
842  initBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);
843 
844  // Identify what noalias metadata is inside the loop: if it is inside the
845  // loop, the associated metadata must be cloned for each iteration.
846  SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
847  identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
848 
849  // For each peeled-off iteration, make a copy of the loop.
850  for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
852  ValueToValueMapTy VMap;
853 
854  cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
855  LoopBlocks, VMap, LVMap, DT, LI,
856  LoopLocalNoAliasDeclScopes);
857 
858  // Remap to use values from the current iteration instead of the
859  // previous one.
860  remapInstructionsInBlocks(NewBlocks, VMap);
861 
862  if (DT) {
863  // Update IDoms of the blocks reachable through exits.
864  if (Iter == 0)
865  for (auto BBIDom : NonLoopBlocksIDom)
866  DT->changeImmediateDominator(BBIDom.first,
867  cast<BasicBlock>(LVMap[BBIDom.second]));
868 #ifdef EXPENSIVE_CHECKS
870 #endif
871  }
872 
873  auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]);
874  updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight);
875  // Remove Loop metadata from the latch branch instruction
876  // because it is not the Loop's latch branch anymore.
877  LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr);
878 
879  InsertTop = InsertBot;
880  InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
881  InsertBot->setName(Header->getName() + ".peel.next");
882 
883  F->getBasicBlockList().splice(InsertTop->getIterator(),
884  F->getBasicBlockList(),
885  NewBlocks[0]->getIterator(), F->end());
886  }
887 
888  // Now adjust the phi nodes in the loop header to get their initial values
889  // from the last peeled-off iteration instead of the preheader.
890  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
891  PHINode *PHI = cast<PHINode>(I);
892  Value *NewVal = PHI->getIncomingValueForBlock(Latch);
893  Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
894  if (LatchInst && L->contains(LatchInst))
895  NewVal = LVMap[LatchInst];
896 
897  PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
898  }
899 
900  fixupBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);
901 
902  // Update Metadata for count of peeled off iterations.
903  unsigned AlreadyPeeled = 0;
904  if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
905  AlreadyPeeled = *Peeled;
906  addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount);
907 
908  if (Loop *ParentLoop = L->getParentLoop())
909  L = ParentLoop;
910 
911  // We modified the loop, update SE.
912  SE->forgetTopmostLoop(L);
913 
914  // Finally DomtTree must be correct.
916 
917  // FIXME: Incrementally update loop-simplify
918  simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA);
919 
920  NumPeeled++;
921 
922  return true;
923 }
UnrollPeelCount
static cl::opt< unsigned > UnrollPeelCount("unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes"))
llvm::BasicBlock::getTerminatingDeoptimizeCall
const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Definition: BasicBlock.cpp:189
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:64
fixupBranchWeights
static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR, uint64_t ExitWeight, uint64_t FallThroughWeight)
Update the weights of original Latch block after peeling off all iterations.
Definition: LoopPeel.cpp:550
llvm::CmpInst::getSwappedPredicate
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:851
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::DominatorTreeBase::findNearestCommonDominator
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
Definition: GenericDomTree.h:468
LoopSimplify.h
Optional.h
initBranchWeights
static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR, uint64_t &ExitWeight, uint64_t &FallThroughWeight)
Initialize the weights.
Definition: LoopPeel.cpp:533
ValueMapper.h
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::peelLoop
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA)
Peel off the first PeelCount iterations of loop L.
Definition: LoopPeel.cpp:739
Metadata.h
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:721
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:370
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
Loads.h
violatesLegacyMultiExitLoopCheck
static bool violatesLegacyMultiExitLoopCheck(Loop *L)
This "heuristic" exactly matches implicit behavior which used to exist inside getLoopEstimatedTripCou...
Definition: LoopPeel.cpp:338
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
calculateIterationsToInvariance
static Optional< unsigned > calculateIterationsToInvariance(PHINode *Phi, Loop *L, BasicBlock *BackEdge, SmallDenseMap< PHINode *, Optional< unsigned > > &IterationsToInvariance)
Definition: LoopPeel.cpp:123
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1177
Statistic.h
llvm::TargetTransformInfo::PeelingPreferences::AllowPeeling
bool AllowPeeling
Allow peeling off loop iterations.
Definition: TargetTransformInfo.h:544
UnrollPeelMaxCount
static cl::opt< unsigned > UnrollPeelMaxCount("unroll-peel-max-count", cl::init(7), cl::Hidden, cl::desc("Max average trip count which will cause loop peeling."))
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::computePeelCount
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned &TripCount, DominatorTree &DT, ScalarEvolution &SE, unsigned Threshold=UINT_MAX)
Definition: LoopPeel.cpp:360
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:835
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
ScalarEvolution.h
DenseMap.h
llvm::ICmpInst::isEquality
bool isEquality() const
Return true if this predicate is either EQ or NE.
Definition: Instructions.h:1285
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:538
llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: GenericDomTree.h:351
llvm::LoopBase::getExitEdges
void getExitEdges(SmallVectorImpl< Edge > &ExitEdges) const
Return all pairs of (inside_block,outside_block).
Definition: LoopInfoImpl.h:148
llvm::Optional< unsigned >
llvm::BranchInst::getNumSuccessors
unsigned getNumSuccessors() const
Definition: Instructions.h:3183
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::ScalarEvolution::evaluatePredicate
Optional< bool > evaluatePredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Check whether the condition described by Pred, LHS, and RHS is true or false.
Definition: ScalarEvolution.cpp:10241
llvm::DomTreeNodeBase::getIDom
DomTreeNodeBase * getIDom() const
Definition: GenericDomTree.h:89
peelToTurnInvariantLoadsDerefencebale
static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L, DominatorTree &DT)
Definition: LoopPeel.cpp:165
llvm::Sched::Fast
@ Fast
Definition: TargetLowering.h:104
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::LoopBlocksDFS::beginRPO
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:136
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BranchInst::setSuccessor
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Definition: Instructions.h:3190
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1336
llvm::canPeel
bool canPeel(Loop *L)
Definition: LoopPeel.cpp:80
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:288
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:115
PeeledCountMetaData
static const char * PeeledCountMetaData
Definition: LoopPeel.cpp:77
Instruction.h
CommandLine.h
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1649
llvm::TargetTransformInfo::PeelingPreferences::PeelProfiledIterations
bool PeelProfiledIterations
Allow peeling basing on profile.
Definition: TargetTransformInfo.h:551
llvm::getOptionalIntLoopAttribute
llvm::Optional< int > getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name)
Find named metadata for a loop with an integer value.
Definition: LoopInfo.cpp:1092
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
countToEliminateCompares
static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE)
Definition: LoopPeel.cpp:231
InstrTypes.h
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::remapInstructionsInBlocks
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock * > &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
Definition: CloneFunction.cpp:787
llvm::MDBuilder::createBranchWeights
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
UnrollForcePeelCount
static cl::opt< unsigned > UnrollForcePeelCount("unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information."))
llvm::cloneLoop
Loop * cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM)
Recursively clone the specified loop and all of its children, mapping the blocks with the specified m...
Definition: LoopUtils.cpp:1495
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2842
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::ScalarEvolution::isKnownPredicate
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
Definition: ScalarEvolution.cpp:10226
llvm::Instruction
Definition: Instruction.h:45
llvm::LoopBlocksDFS::endRPO
RPOIterator endRPO() const
Definition: LoopIterator.h:140
llvm::isDereferenceablePointer
bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition: Loads.cpp:234
MDBuilder.h
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:376
llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:34
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:402
LoopUtils.h
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:148
llvm::DominatorTreeBase::changeImmediateDominator
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
Definition: GenericDomTree.h:655
llvm::LoopBase::getExitingBlock
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
PatternMatch.h
llvm::Instruction::extractProfMetadata
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Definition: Metadata.cpp:1405
llvm::None
const NoneType None
Definition: None.h:23
LoopIterator.h
LoopInfo.h
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4339
llvm::LoopBlocksDFS::RPOIterator
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
Definition: LoopIterator.h:101
llvm::LoopBlocksDFS
Store the result of a depth first search within basic blocks contained by a single loop.
Definition: LoopIterator.h:97
llvm::cloneAndAdaptNoAliasScopes
void cloneAndAdaptNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, ArrayRef< BasicBlock * > NewBlocks, LLVMContext &Context, StringRef Ext)
Clone the specified noalias decl scopes.
Definition: CloneFunction.cpp:991
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:479
llvm::CloneBasicBlock
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
Definition: CloneFunction.cpp:43
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1432
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:77
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:156
llvm::SmallPtrSetImpl::find
iterator find(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:385
llvm::DomTreeNodeBase::getBlock
NodeT * getBlock() const
Definition: GenericDomTree.h:88
uint64_t
llvm::SmallPtrSetImpl::end
iterator end() const
Definition: SmallPtrSet.h:407
llvm::addStringMetadataToLoop
void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V=0)
Set input string into loop metadata by keeping other values intact.
Definition: LoopUtils.cpp:223
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:714
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:970
I
#define I(x, y, z)
Definition: MD5.cpp:58
Cloning.h
llvm::SCEVNAryExpr::hasNoSelfWrap
bool hasNoSelfWrap() const
Definition: ScalarEvolutionExpressions.h:225
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::ScalarEvolution::getMonotonicPredicateType
Optional< MonotonicPredicateType > getMonotonicPredicateType(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred)
If, for all loop invariant X, the predicate "LHS `Pred` X" is monotonically increasing or decreasing,...
Definition: ScalarEvolution.cpp:10285
llvm::LoopBlocksDFS::perform
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:1221
llvm::TargetTransformInfo::PeelingPreferences::AllowLoopNestsPeeling
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
Definition: TargetTransformInfo.h:546
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::PHINode::setIncomingValueForBlock
void setIncomingValueForBlock(const BasicBlock *BB, Value *V)
Set every incoming value(s) for block BB to V.
Definition: Instructions.h:2849
llvm::ScalarEvolution::forgetTopmostLoop
void forgetTopmostLoop(const Loop *L)
Definition: ScalarEvolution.cpp:7907
llvm::MDNode
Metadata node.
Definition: Metadata.h:906
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::TargetTransformInfo::PeelingPreferences::PeelCount
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Definition: TargetTransformInfo.h:542
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1656
llvm::identifyNoAliasScopesToClone
void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
Definition: CloneFunction.cpp:1028
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::Instruction::setSuccessor
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Definition: Instruction.cpp:799
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:450
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
UnrollAllowLoopNestsPeeling
static cl::opt< bool > UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden, cl::desc("Allows loop nests to be peeled."))
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
UnrollAllowPeeling
static cl::opt< bool > UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low."))
llvm::DomTreeNodeBase< BasicBlock >
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::ValueMap< const Value *, WeakTrackingVH >
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
llvm::LoopBase::getUniqueNonLatchExitBlocks
void getUniqueNonLatchExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop except successors from Latch block are not considered...
Definition: LoopInfoImpl.h:129
llvm::DominatorTreeBase::addNewBlock
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
Definition: GenericDomTree.h:619
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:36
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
llvm::SplitEdge
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Definition: BasicBlockUtils.cpp:518
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:354
updateBranchWeights
static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, uint64_t ExitWeight, uint64_t &FallThroughWeight)
Update the branch weights of the latch of a peeled-off loop iteration.
Definition: LoopPeel.cpp:509
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:342
Casting.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::simplifyLoop
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
Definition: LoopSimplify.cpp:718
llvm::Function::hasProfileData
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:292
llvm::MDBuilder
Definition: MDBuilder.h:35
ScalarEvolutionExpressions.h
Instructions.h
llvm::IsBlockFollowedByDeoptOrUnreachable
bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
Definition: BasicBlockUtils.cpp:499
SmallVector.h
llvm::LoopBase::addBasicBlockToLoop
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:242
llvm::PatternMatch::m_ICmp
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:1404
Dominators.h
UnrollLoop.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition: GenericDomTree.h:802
TargetTransformInfo.h
llvm::TargetTransformInfo::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
Definition: TargetTransformInfo.cpp:331
llvm::PHINode
Definition: Instructions.h:2657
Threshold
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:381
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2440
llvm::LoopBase::isLoopExiting
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Definition: LoopInfo.h:225
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
LLVMContext.h
llvm::cl::desc
Definition: CommandLine.h:412
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3092
raw_ostream.h
llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:388
LoopPeel.h
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:839
cloneLoopBlocks
static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, SmallVectorImpl< std::pair< BasicBlock *, BasicBlock * >> &ExitEdges, SmallVectorImpl< BasicBlock * > &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, LoopInfo *LI, ArrayRef< MDNode * > LoopLocalNoAliasDeclScopes)
Clones the body of the loop L, putting it between InsertTop and InsertBot.
Definition: LoopPeel.cpp:577
llvm::getLoopEstimatedTripCount
Optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:825
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::gatherPeelingPreferences
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, Optional< bool > UserAllowPeeling, Optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
Definition: LoopPeel.cpp:696
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3185
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:360
llvm::SCEVAddRecExpr::evaluateAtIteration
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
Definition: ScalarEvolution.cpp:1030
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364