LLVM  9.0.0svn
LoopUnrollPeel.cpp
Go to the documentation of this file.
1 //===- UnrollLoopPeel.cpp - Loop peeling utilities ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements some loop unrolling utilities for peeling loops
10 // with dynamically inferred (from PGO) trip counts. See LoopUnroll.cpp for
11 // unrolling loops with compile-time constant trip counts.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/Optional.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/LoopInfo.h"
24 #include "llvm/IR/BasicBlock.h"
25 #include "llvm/IR/Dominators.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/IR/InstrTypes.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/IR/MDBuilder.h"
32 #include "llvm/IR/Metadata.h"
33 #include "llvm/IR/PatternMatch.h"
34 #include "llvm/Support/Casting.h"
36 #include "llvm/Support/Debug.h"
44 #include <algorithm>
45 #include <cassert>
46 #include <cstdint>
47 #include <limits>
48 
49 using namespace llvm;
50 using namespace llvm::PatternMatch;
51 
52 #define DEBUG_TYPE "loop-unroll"
53 
54 STATISTIC(NumPeeled, "Number of loops peeled");
55 
57  "unroll-peel-max-count", cl::init(7), cl::Hidden,
58  cl::desc("Max average trip count which will cause loop peeling."));
59 
61  "unroll-force-peel-count", cl::init(0), cl::Hidden,
62  cl::desc("Force a peel count regardless of profiling information."));
63 
64 // Designates that a Phi is estimated to become invariant after an "infinite"
65 // number of loop iterations (i.e. only may become an invariant if the loop is
66 // fully unrolled).
67 static const unsigned InfiniteIterationsToInvariance =
69 
70 // Check whether we are capable of peeling this loop.
71 bool llvm::canPeel(Loop *L) {
72  // Make sure the loop is in simplified form
73  if (!L->isLoopSimplifyForm())
74  return false;
75 
76  // Only peel loops that contain a single exit
77  if (!L->getExitingBlock() || !L->getUniqueExitBlock())
78  return false;
79 
80  // Don't try to peel loops where the latch is not the exiting block.
81  // This can be an indication of two different things:
82  // 1) The loop is not rotated.
83  // 2) The loop contains irreducible control flow that involves the latch.
84  if (L->getLoopLatch() != L->getExitingBlock())
85  return false;
86 
87  return true;
88 }
89 
90 // This function calculates the number of iterations after which the given Phi
91 // becomes an invariant. The pre-calculated values are memorized in the map. The
92 // function (shortcut is I) is calculated according to the following definition:
93 // Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
94 // If %y is a loop invariant, then I(%x) = 1.
95 // If %y is a Phi from the loop header, I(%x) = I(%y) + 1.
96 // Otherwise, I(%x) is infinite.
97 // TODO: Actually if %y is an expression that depends only on Phi %z and some
98 // loop invariants, we can estimate I(%x) = I(%z) + 1. The example
99 // looks like:
100 // %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration.
101 // %y = phi(0, 5),
102 // %a = %y + 1.
104  PHINode *Phi, Loop *L, BasicBlock *BackEdge,
105  SmallDenseMap<PHINode *, unsigned> &IterationsToInvariance) {
106  assert(Phi->getParent() == L->getHeader() &&
107  "Non-loop Phi should not be checked for turning into invariant.");
108  assert(BackEdge == L->getLoopLatch() && "Wrong latch?");
109  // If we already know the answer, take it from the map.
110  auto I = IterationsToInvariance.find(Phi);
111  if (I != IterationsToInvariance.end())
112  return I->second;
113 
114  // Otherwise we need to analyze the input from the back edge.
115  Value *Input = Phi->getIncomingValueForBlock(BackEdge);
116  // Place infinity to map to avoid infinite recursion for cycled Phis. Such
117  // cycles can never stop on an invariant.
118  IterationsToInvariance[Phi] = InfiniteIterationsToInvariance;
119  unsigned ToInvariance = InfiniteIterationsToInvariance;
120 
121  if (L->isLoopInvariant(Input))
122  ToInvariance = 1u;
123  else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) {
124  // Only consider Phis in header block.
125  if (IncPhi->getParent() != L->getHeader())
126  return InfiniteIterationsToInvariance;
127  // If the input becomes an invariant after X iterations, then our Phi
128  // becomes an invariant after X + 1 iterations.
129  unsigned InputToInvariance = calculateIterationsToInvariance(
130  IncPhi, L, BackEdge, IterationsToInvariance);
131  if (InputToInvariance != InfiniteIterationsToInvariance)
132  ToInvariance = InputToInvariance + 1u;
133  }
134 
135  // If we found that this Phi lies in an invariant chain, update the map.
136  if (ToInvariance != InfiniteIterationsToInvariance)
137  IterationsToInvariance[Phi] = ToInvariance;
138  return ToInvariance;
139 }
140 
141 // Return the number of iterations to peel off that make conditions in the
142 // body true/false. For example, if we peel 2 iterations off the loop below,
143 // the condition i < 2 can be evaluated at compile time.
144 // for (i = 0; i < n; i++)
145 // if (i < 2)
146 // ..
147 // else
148 // ..
149 // }
150 static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
151  ScalarEvolution &SE) {
152  assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
153  unsigned DesiredPeelCount = 0;
154 
155  for (auto *BB : L.blocks()) {
156  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
157  if (!BI || BI->isUnconditional())
158  continue;
159 
160  // Ignore loop exit condition.
161  if (L.getLoopLatch() == BB)
162  continue;
163 
164  Value *Condition = BI->getCondition();
165  Value *LeftVal, *RightVal;
166  CmpInst::Predicate Pred;
167  if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
168  continue;
169 
170  const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
171  const SCEV *RightSCEV = SE.getSCEV(RightVal);
172 
173  // Do not consider predicates that are known to be true or false
174  // independently of the loop iteration.
175  if (SE.isKnownPredicate(Pred, LeftSCEV, RightSCEV) ||
177  RightSCEV))
178  continue;
179 
180  // Check if we have a condition with one AddRec and one non AddRec
181  // expression. Normalize LeftSCEV to be the AddRec.
182  if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
183  if (isa<SCEVAddRecExpr>(RightSCEV)) {
184  std::swap(LeftSCEV, RightSCEV);
185  Pred = ICmpInst::getSwappedPredicate(Pred);
186  } else
187  continue;
188  }
189 
190  const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
191 
192  // Avoid huge SCEV computations in the loop below, make sure we only
193  // consider AddRecs of the loop we are trying to peel and avoid
194  // non-monotonic predicates, as we will not be able to simplify the loop
195  // body.
196  // FIXME: For the non-monotonic predicates ICMP_EQ and ICMP_NE we can
197  // simplify the loop, if we peel 1 additional iteration, if there
198  // is no wrapping.
199  bool Increasing;
200  if (!LeftAR->isAffine() || LeftAR->getLoop() != &L ||
201  !SE.isMonotonicPredicate(LeftAR, Pred, Increasing))
202  continue;
203  (void)Increasing;
204 
205  // Check if extending the current DesiredPeelCount lets us evaluate Pred
206  // or !Pred in the loop body statically.
207  unsigned NewPeelCount = DesiredPeelCount;
208 
209  const SCEV *IterVal = LeftAR->evaluateAtIteration(
210  SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);
211 
212  // If the original condition is not known, get the negated predicate
213  // (which holds on the else branch) and check if it is known. This allows
214  // us to peel of iterations that make the original condition false.
215  if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
216  Pred = ICmpInst::getInversePredicate(Pred);
217 
218  const SCEV *Step = LeftAR->getStepRecurrence(SE);
219  while (NewPeelCount < MaxPeelCount &&
220  SE.isKnownPredicate(Pred, IterVal, RightSCEV)) {
221  IterVal = SE.getAddExpr(IterVal, Step);
222  NewPeelCount++;
223  }
224 
225  // Only peel the loop if the monotonic predicate !Pred becomes known in the
226  // first iteration of the loop body after peeling.
227  if (NewPeelCount > DesiredPeelCount &&
229  RightSCEV))
230  DesiredPeelCount = NewPeelCount;
231  }
232 
233  return DesiredPeelCount;
234 }
235 
236 // Return the number of iterations we want to peel off.
237 void llvm::computePeelCount(Loop *L, unsigned LoopSize,
239  unsigned &TripCount, ScalarEvolution &SE) {
240  assert(LoopSize > 0 && "Zero loop size is not allowed!");
241  // Save the UP.PeelCount value set by the target in
242  // TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
243  unsigned TargetPeelCount = UP.PeelCount;
244  UP.PeelCount = 0;
245  if (!canPeel(L))
246  return;
247 
248  // Only try to peel innermost loops.
249  if (!L->empty())
250  return;
251 
252  // If the user provided a peel count, use that.
253  bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
254  if (UserPeelCount) {
255  LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
256  << " iterations.\n");
258  return;
259  }
260 
261  // Skip peeling if it's disabled.
262  if (!UP.AllowPeeling)
263  return;
264 
265  // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
266  // iterations of the loop. For this we compute the number for iterations after
267  // which every Phi is guaranteed to become an invariant, and try to peel the
268  // maximum number of iterations among these values, thus turning all those
269  // Phis into invariants.
270  // First, check that we can peel at least one iteration.
271  if (2 * LoopSize <= UP.Threshold && UnrollPeelMaxCount > 0) {
272  // Store the pre-calculated values here.
273  SmallDenseMap<PHINode *, unsigned> IterationsToInvariance;
274  // Now go through all Phis to calculate their the number of iterations they
275  // need to become invariants.
276  // Start the max computation with the UP.PeelCount value set by the target
277  // in TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
278  unsigned DesiredPeelCount = TargetPeelCount;
279  BasicBlock *BackEdge = L->getLoopLatch();
280  assert(BackEdge && "Loop is not in simplified form?");
281  for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) {
282  PHINode *Phi = cast<PHINode>(&*BI);
283  unsigned ToInvariance = calculateIterationsToInvariance(
284  Phi, L, BackEdge, IterationsToInvariance);
285  if (ToInvariance != InfiniteIterationsToInvariance)
286  DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance);
287  }
288 
289  // Pay respect to limitations implied by loop size and the max peel count.
290  unsigned MaxPeelCount = UnrollPeelMaxCount;
291  MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1);
292 
293  DesiredPeelCount = std::max(DesiredPeelCount,
294  countToEliminateCompares(*L, MaxPeelCount, SE));
295 
296  if (DesiredPeelCount > 0) {
297  DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
298  // Consider max peel count limitation.
299  assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
300  LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
301  << " iteration(s) to turn"
302  << " some Phis into invariants.\n");
303  UP.PeelCount = DesiredPeelCount;
304  return;
305  }
306  }
307 
308  // Bail if we know the statically calculated trip count.
309  // In this case we rather prefer partial unrolling.
310  if (TripCount)
311  return;
312 
313  // If we don't know the trip count, but have reason to believe the average
314  // trip count is low, peeling should be beneficial, since we will usually
315  // hit the peeled section.
316  // We only do this in the presence of profile information, since otherwise
317  // our estimates of the trip count are not reliable enough.
318  if (L->getHeader()->getParent()->hasProfileData()) {
320  if (!PeelCount)
321  return;
322 
323  LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount
324  << "\n");
325 
326  if (*PeelCount) {
327  if ((*PeelCount <= UnrollPeelMaxCount) &&
328  (LoopSize * (*PeelCount + 1) <= UP.Threshold)) {
329  LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount
330  << " iterations.\n");
331  UP.PeelCount = *PeelCount;
332  return;
333  }
334  LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
335  LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
336  LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1)
337  << "\n");
338  LLVM_DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n");
339  }
340  }
341 }
342 
343 /// Update the branch weights of the latch of a peeled-off loop
344 /// iteration.
345 /// This sets the branch weights for the latch of the recently peeled off loop
346 /// iteration correctly.
347 /// Our goal is to make sure that:
348 /// a) The total weight of all the copies of the loop body is preserved.
349 /// b) The total weight of the loop exit is preserved.
350 /// c) The body weight is reasonably distributed between the peeled iterations.
351 ///
352 /// \param Header The copy of the header block that belongs to next iteration.
353 /// \param LatchBR The copy of the latch branch that belongs to this iteration.
354 /// \param IterNumber The serial number of the iteration that was just
355 /// peeled off.
356 /// \param AvgIters The average number of iterations we expect the loop to have.
357 /// \param[in,out] PeeledHeaderWeight The total number of dynamic loop
358 /// iterations that are unaccounted for. As an input, it represents the number
359 /// of times we expect to enter the header of the iteration currently being
360 /// peeled off. The output is the number of times we expect to enter the
361 /// header of the next iteration.
362 static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
363  unsigned IterNumber, unsigned AvgIters,
364  uint64_t &PeeledHeaderWeight) {
365  // FIXME: Pick a more realistic distribution.
366  // Currently the proportion of weight we assign to the fall-through
367  // side of the branch drops linearly with the iteration number, and we use
368  // a 0.9 fudge factor to make the drop-off less sharp...
369  if (PeeledHeaderWeight) {
370  uint64_t FallThruWeight =
371  PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9);
372  uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight;
373  PeeledHeaderWeight -= ExitWeight;
374 
375  unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
376  MDBuilder MDB(LatchBR->getContext());
377  MDNode *WeightNode =
378  HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight)
379  : MDB.createBranchWeights(FallThruWeight, ExitWeight);
380  LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
381  }
382 }
383 
384 /// Initialize the weights.
385 ///
386 /// \param Header The header block.
387 /// \param LatchBR The latch branch.
388 /// \param AvgIters The average number of iterations we expect the loop to have.
389 /// \param[out] ExitWeight The # of times the edge from Latch to Exit is taken.
390 /// \param[out] CurHeaderWeight The # of times the header is executed.
391 static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
392  unsigned AvgIters, uint64_t &ExitWeight,
393  uint64_t &CurHeaderWeight) {
394  uint64_t TrueWeight, FalseWeight;
395  if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight))
396  return;
397  unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
398  ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
399  // The # of times the loop body executes is the sum of the exit block
400  // is taken and the # of times the backedges are taken.
401  CurHeaderWeight = TrueWeight + FalseWeight;
402 }
403 
404 /// Update the weights of original Latch block after peeling off all iterations.
405 ///
406 /// \param Header The header block.
407 /// \param LatchBR The latch branch.
408 /// \param ExitWeight The weight of the edge from Latch to Exit block.
409 /// \param CurHeaderWeight The # of time the header is executed.
410 static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
411  uint64_t ExitWeight, uint64_t CurHeaderWeight) {
412  // Adjust the branch weights on the loop exit.
413  if (ExitWeight) {
414  // The backedge count is the difference of current header weight and
415  // current loop exit weight. If the current header weight is smaller than
416  // the current loop exit weight, we mark the loop backedge weight as 1.
417  uint64_t BackEdgeWeight = 0;
418  if (ExitWeight < CurHeaderWeight)
419  BackEdgeWeight = CurHeaderWeight - ExitWeight;
420  else
421  BackEdgeWeight = 1;
422  MDBuilder MDB(LatchBR->getContext());
423  unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
424  MDNode *WeightNode =
425  HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
426  : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
427  LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
428  }
429 }
430 
431 /// Clones the body of the loop L, putting it between \p InsertTop and \p
432 /// InsertBot.
433 /// \param IterNumber The serial number of the iteration currently being
434 /// peeled off.
435 /// \param ExitEdges The exit edges of the original loop.
436 /// \param[out] NewBlocks A list of the blocks in the newly created clone
437 /// \param[out] VMap The value map between the loop and the new clone.
438 /// \param LoopBlocks A helper for DFS-traversal of the loop.
439 /// \param LVMap A value-map that maps instructions from the original loop to
440 /// instructions in the last peeled-off iteration.
441 static void cloneLoopBlocks(
442  Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot,
443  SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *> > &ExitEdges,
444  SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
446  LoopInfo *LI) {
447  BasicBlock *Header = L->getHeader();
448  BasicBlock *Latch = L->getLoopLatch();
449  BasicBlock *PreHeader = L->getLoopPreheader();
450 
451  Function *F = Header->getParent();
452  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
453  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
454  Loop *ParentLoop = L->getParentLoop();
455 
456  // For each block in the original loop, create a new copy,
457  // and update the value map with the newly created values.
458  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
459  BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
460  NewBlocks.push_back(NewBB);
461 
462  if (ParentLoop)
463  ParentLoop->addBasicBlockToLoop(NewBB, *LI);
464 
465  VMap[*BB] = NewBB;
466 
467  // If dominator tree is available, insert nodes to represent cloned blocks.
468  if (DT) {
469  if (Header == *BB)
470  DT->addNewBlock(NewBB, InsertTop);
471  else {
472  DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
473  // VMap must contain entry for IDom, as the iteration order is RPO.
474  DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
475  }
476  }
477  }
478 
479  // Hook-up the control flow for the newly inserted blocks.
480  // The new header is hooked up directly to the "top", which is either
481  // the original loop preheader (for the first iteration) or the previous
482  // iteration's exiting block (for every other iteration)
483  InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));
484 
485  // Similarly, for the latch:
486  // The original exiting edge is still hooked up to the loop exit.
487  // The backedge now goes to the "bottom", which is either the loop's real
488  // header (for the last peeled iteration) or the copied header of the next
489  // iteration (for every other iteration)
490  BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
491  BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator());
492  for (unsigned idx = 0, e = LatchBR->getNumSuccessors(); idx < e; ++idx)
493  if (LatchBR->getSuccessor(idx) == Header) {
494  LatchBR->setSuccessor(idx, InsertBot);
495  break;
496  }
497  if (DT)
498  DT->changeImmediateDominator(InsertBot, NewLatch);
499 
500  // The new copy of the loop body starts with a bunch of PHI nodes
501  // that pick an incoming value from either the preheader, or the previous
502  // loop iteration. Since this copy is no longer part of the loop, we
503  // resolve this statically:
504  // For the first iteration, we use the value from the preheader directly.
505  // For any other iteration, we replace the phi with the value generated by
506  // the immediately preceding clone of the loop body (which represents
507  // the previous iteration).
508  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
509  PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
510  if (IterNumber == 0) {
511  VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader);
512  } else {
513  Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch);
514  Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
515  if (LatchInst && L->contains(LatchInst))
516  VMap[&*I] = LVMap[LatchInst];
517  else
518  VMap[&*I] = LatchVal;
519  }
520  cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
521  }
522 
523  // Fix up the outgoing values - we need to add a value for the iteration
524  // we've just created. Note that this must happen *after* the incoming
525  // values are adjusted, since the value going out of the latch may also be
526  // a value coming into the header.
527  for (auto Edge : ExitEdges)
528  for (PHINode &PHI : Edge.second->phis()) {
529  Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
530  Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
531  if (LatchInst && L->contains(LatchInst))
532  LatchVal = VMap[LatchVal];
533  PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
534  }
535 
536  // LastValueMap is updated with the values for the current loop
537  // which are used the next time this function is called.
538  for (const auto &KV : VMap)
539  LVMap[KV.first] = KV.second;
540 }
541 
542 /// Peel off the first \p PeelCount iterations of loop \p L.
543 ///
544 /// Note that this does not peel them off as a single straight-line block.
545 /// Rather, each iteration is peeled off separately, and needs to check the
546 /// exit condition.
547 /// For loops that dynamically execute \p PeelCount iterations or less
548 /// this provides a benefit, since the peeled off iterations, which account
549 /// for the bulk of dynamic execution, can be further simplified by scalar
550 /// optimizations.
551 bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
553  AssumptionCache *AC, bool PreserveLCSSA) {
554  assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
555  assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
556 
557  LoopBlocksDFS LoopBlocks(L);
558  LoopBlocks.perform(LI);
559 
560  BasicBlock *Header = L->getHeader();
561  BasicBlock *PreHeader = L->getLoopPreheader();
562  BasicBlock *Latch = L->getLoopLatch();
564  L->getExitEdges(ExitEdges);
565 
566  Function *F = Header->getParent();
567 
568  // Set up all the necessary basic blocks. It is convenient to split the
569  // preheader into 3 parts - two blocks to anchor the peeled copy of the loop
570  // body, and a new preheader for the "real" loop.
571 
572  // Peeling the first iteration transforms.
573  //
574  // PreHeader:
575  // ...
576  // Header:
577  // LoopBody
578  // If (cond) goto Header
579  // Exit:
580  //
581  // into
582  //
583  // InsertTop:
584  // LoopBody
585  // If (!cond) goto Exit
586  // InsertBot:
587  // NewPreHeader:
588  // ...
589  // Header:
590  // LoopBody
591  // If (cond) goto Header
592  // Exit:
593  //
594  // Each following iteration will split the current bottom anchor in two,
595  // and put the new copy of the loop body between these two blocks. That is,
596  // after peeling another iteration from the example above, we'll split
597  // InsertBot, and get:
598  //
599  // InsertTop:
600  // LoopBody
601  // If (!cond) goto Exit
602  // InsertBot:
603  // LoopBody
604  // If (!cond) goto Exit
605  // InsertBot.next:
606  // NewPreHeader:
607  // ...
608  // Header:
609  // LoopBody
610  // If (cond) goto Header
611  // Exit:
612 
613  BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI);
614  BasicBlock *InsertBot =
615  SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI);
616  BasicBlock *NewPreHeader =
617  SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
618 
619  InsertTop->setName(Header->getName() + ".peel.begin");
620  InsertBot->setName(Header->getName() + ".peel.next");
621  NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
622 
623  ValueToValueMapTy LVMap;
624 
625  // If we have branch weight information, we'll want to update it for the
626  // newly created branches.
627  BranchInst *LatchBR =
628  cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator());
629  uint64_t ExitWeight = 0, CurHeaderWeight = 0;
630  initBranchWeights(Header, LatchBR, PeelCount, ExitWeight, CurHeaderWeight);
631 
632  // For each peeled-off iteration, make a copy of the loop.
633  for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
635  ValueToValueMapTy VMap;
636 
637  // Subtract the exit weight from the current header weight -- the exit
638  // weight is exactly the weight of the previous iteration's header.
639  // FIXME: due to the way the distribution is constructed, we need a
640  // guard here to make sure we don't end up with non-positive weights.
641  if (ExitWeight < CurHeaderWeight)
642  CurHeaderWeight -= ExitWeight;
643  else
644  CurHeaderWeight = 1;
645 
646  cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
647  LoopBlocks, VMap, LVMap, DT, LI);
648 
649  // Remap to use values from the current iteration instead of the
650  // previous one.
651  remapInstructionsInBlocks(NewBlocks, VMap);
652 
653  if (DT) {
654  // Latches of the cloned loops dominate over the loop exit, so idom of the
655  // latter is the first cloned loop body, as original PreHeader dominates
656  // the original loop body.
657  if (Iter == 0)
658  for (auto Edge : ExitEdges)
659  DT->changeImmediateDominator(Edge.second,
660  cast<BasicBlock>(LVMap[Edge.first]));
661 #ifdef EXPENSIVE_CHECKS
663 #endif
664  }
665 
666  auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]);
667  updateBranchWeights(InsertBot, LatchBRCopy, Iter,
668  PeelCount, ExitWeight);
669  // Remove Loop metadata from the latch branch instruction
670  // because it is not the Loop's latch branch anymore.
671  LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr);
672 
673  InsertTop = InsertBot;
674  InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
675  InsertBot->setName(Header->getName() + ".peel.next");
676 
677  F->getBasicBlockList().splice(InsertTop->getIterator(),
678  F->getBasicBlockList(),
679  NewBlocks[0]->getIterator(), F->end());
680  }
681 
682  // Now adjust the phi nodes in the loop header to get their initial values
683  // from the last peeled-off iteration instead of the preheader.
684  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
685  PHINode *PHI = cast<PHINode>(I);
686  Value *NewVal = PHI->getIncomingValueForBlock(Latch);
687  Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
688  if (LatchInst && L->contains(LatchInst))
689  NewVal = LVMap[LatchInst];
690 
691  PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
692  }
693 
694  fixupBranchWeights(Header, LatchBR, ExitWeight, CurHeaderWeight);
695 
696  if (Loop *ParentLoop = L->getParentLoop())
697  L = ParentLoop;
698 
699  // We modified the loop, update SE.
700  SE->forgetTopmostLoop(L);
701 
702  // FIXME: Incrementally update loop-simplify
703  simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA);
704 
705  NumPeeled++;
706 
707  return true;
708 }
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, SmallVectorImpl< std::pair< BasicBlock *, BasicBlock *> > &ExitEdges, SmallVectorImpl< BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, LoopInfo *LI)
Clones the body of the loop L, putting it between InsertTop and InsertBot.
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:244
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
const SCEV * getConstant(ConstantInt *V)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
iterator end()
Definition: Function.h:682
void setIncomingValueForBlock(const BasicBlock *BB, Value *V)
Set every incoming value(s) for block BB to V.
void push_back(const T &Elt)
Definition: SmallVector.h:211
The main scalar evolution driver.
This file contains the declarations for metadata subclasses.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:193
A cache of @llvm.assume calls within a function.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:732
BasicBlock * getSuccessor(unsigned i) const
bool isMonotonicPredicate(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred, bool &Increasing)
Return true if, for all loop invariant X, the predicate "LHS `Pred` X" is monotonically increasing or...
STATISTIC(NumFunctions, "Total number of functions")
Metadata node.
Definition: Metadata.h:863
F(f)
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Split the edge connecting specified block.
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
static unsigned calculateIterationsToInvariance(PHINode *Phi, Loop *L, BasicBlock *BackEdge, SmallDenseMap< PHINode *, unsigned > &IterationsToInvariance)
static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR, unsigned AvgIters, uint64_t &ExitWeight, uint64_t &CurHeaderWeight)
Initialize the weights.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
static cl::opt< unsigned > UnrollPeelMaxCount("unroll-peel-max-count", cl::init(7), cl::Hidden, cl::desc("Max average trip count which will cause loop peeling."))
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:831
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, unsigned IterNumber, unsigned AvgIters, uint64_t &PeeledHeaderWeight)
Update the branch weights of the latch of a peeled-off loop iteration.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
bool AllowPeeling
Allow peeling off loop iterations for loops with low dynamic tripcount.
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:285
RPOIterator endRPO() const
Definition: LoopIterator.h:140
static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR, uint64_t ExitWeight, uint64_t CurHeaderWeight)
Update the weights of original Latch block after peeling off all iterations.
BlockT * getHeader() const
Definition: LoopInfo.h:102
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, unsigned &TripCount, ScalarEvolution &SE)
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS...
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:270
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:1061
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
NodeT * getBlock() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
Conditional or Unconditional Branch instruction.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node&#39;s...
static cl::opt< unsigned > UnrollForcePeelCount("unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information."))
DomTreeNodeBase * getIDom() const
Value * getIncomingValueForBlock(const BasicBlock *BB) const
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA)
Peel off the first PeelCount iterations of loop L.
static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE)
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
void splice(iterator where, iplist_impl &L2)
Definition: ilist.h:327
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
Definition: LoopIterator.h:101
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
self_iterator getIterator()
Definition: ilist_node.h:81
Optional< unsigned > getLoopEstimatedTripCount(Loop *L)
Get a loop&#39;s estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:623
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:308
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:164
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1222
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void getExitEdges(SmallVectorImpl< Edge > &ExitEdges) const
Return all pairs of (inside_block,outside_block).
Definition: LoopInfoImpl.h:174
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:112
Iterator for intrusive lists based on ilist_node.
Type * getType() const
Return the LLVM type of this SCEV expression.
static const unsigned InfiniteIterationsToInvariance
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool canPeel(Loop *L)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
Store the result of a depth first search within basic blocks contained by a single loop...
Definition: LoopIterator.h:97
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
LoopT * getParentLoop() const
Definition: LoopInfo.h:103
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:425
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
unsigned Threshold
The cost threshold for the unrolled loop.
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:510
Parameters that control the generic loop unrolling transformation.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
const BasicBlockListType & getBasicBlockList() const
Get the underlying elements of the Function...
Definition: Function.h:657
bool empty() const
Definition: LoopInfo.h:148
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:72
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:136
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Split the specified block at the specified instruction - everything before SplitPt stays in Old and e...
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:847
This pass exposes codegen information to IR-level passes.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Definition: Metadata.cpp:1311
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:158
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
const BasicBlock * getParent() const
Definition: Instruction.h:66
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
void forgetTopmostLoop(const Loop *L)
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock *> &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.