LLVM 22.0.0git
LoopPeel.cpp
Go to the documentation of this file.
1//===- LoopPeel.cpp -------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Loop Peeling Utilities.
10//===----------------------------------------------------------------------===//
11
13#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/Statistic.h"
16#include "llvm/Analysis/Loads.h"
23#include "llvm/IR/BasicBlock.h"
24#include "llvm/IR/Dominators.h"
25#include "llvm/IR/Function.h"
26#include "llvm/IR/InstrTypes.h"
27#include "llvm/IR/Instruction.h"
29#include "llvm/IR/LLVMContext.h"
30#include "llvm/IR/MDBuilder.h"
35#include "llvm/Support/Debug.h"
43#include <algorithm>
44#include <cassert>
45#include <cstdint>
46#include <optional>
47
48using namespace llvm;
49using namespace llvm::PatternMatch;
50using namespace llvm::SCEVPatternMatch;
51
52#define DEBUG_TYPE "loop-peel"
53
54STATISTIC(NumPeeled, "Number of loops peeled");
55STATISTIC(NumPeeledEnd, "Number of loops peeled from end");
56
57namespace llvm {
59 "unroll-peel-count", cl::Hidden,
60 cl::desc("Set the unroll peeling count, for testing purposes"));
61
62static cl::opt<bool>
63 UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
64 cl::desc("Allows loops to be peeled when the dynamic "
65 "trip count is known to be low."));
66
67static cl::opt<bool>
68 UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling",
69 cl::init(false), cl::Hidden,
70 cl::desc("Allows loop nests to be peeled."));
71
73 "unroll-peel-max-count", cl::init(7), cl::Hidden,
74 cl::desc("Max average trip count which will cause loop peeling."));
75
77 "unroll-force-peel-count", cl::init(0), cl::Hidden,
78 cl::desc("Force a peel count regardless of profiling information."));
79
81 "disable-advanced-peeling", cl::init(false), cl::Hidden,
83 "Disable advance peeling. Issues for convergent targets (D134803)."));
84
86 "enable-peeling-for-iv", cl::init(false), cl::Hidden,
87 cl::desc("Enable peeling to convert Phi nodes into IVs"));
88
89static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
90
92} // namespace llvm
93
94// Check whether we are capable of peeling this loop.
95bool llvm::canPeel(const Loop *L) {
96 // Make sure the loop is in simplified form
97 if (!L->isLoopSimplifyForm())
98 return false;
100 return true;
101
103 L->getUniqueNonLatchExitBlocks(Exits);
104 // The latch must either be the only exiting block or all non-latch exit
105 // blocks have either a deopt or unreachable terminator or compose a chain of
106 // blocks where the last one is either deopt or unreachable terminated. Both
107 // deopt and unreachable terminators are a strong indication they are not
108 // taken. Note that this is a profitability check, not a legality check. Also
109 // note that LoopPeeling currently can only update the branch weights of latch
110 // blocks and branch weights to blocks with deopt or unreachable do not need
111 // updating.
113}
114
115namespace {
116
117// As a loop is peeled, it may be the case that Phi nodes become
118// loop-invariant (ie, known because there is only one choice).
119// For example, consider the following function:
120// void g(int);
121// void binary() {
122// int x = 0;
123// int y = 0;
124// int a = 0;
125// for(int i = 0; i <100000; ++i) {
126// g(x);
127// x = y;
128// g(a);
129// y = a + 1;
130// a = 5;
131// }
132// }
133// Peeling 3 iterations is beneficial because the values for x, y and a
134// become known. The IR for this loop looks something like the following:
135//
136// %i = phi i32 [ 0, %entry ], [ %inc, %if.end ]
137// %a = phi i32 [ 0, %entry ], [ 5, %if.end ]
138// %y = phi i32 [ 0, %entry ], [ %add, %if.end ]
139// %x = phi i32 [ 0, %entry ], [ %y, %if.end ]
140// ...
141// tail call void @_Z1gi(i32 signext %x)
142// tail call void @_Z1gi(i32 signext %a)
143// %add = add nuw nsw i32 %a, 1
144// %inc = add nuw nsw i32 %i, 1
145// %exitcond = icmp eq i32 %inc, 100000
146// br i1 %exitcond, label %for.cond.cleanup, label %for.body
147//
148// The arguments for the calls to g will become known after 3 iterations
149// of the loop, because the phi nodes values become known after 3 iterations
150// of the loop (ie, they are known on the 4th iteration, so peel 3 iterations).
151// The first iteration has g(0), g(0); the second has g(0), g(5); the
152// third has g(1), g(5) and the fourth (and all subsequent) have g(6), g(5).
153// Now consider the phi nodes:
154// %a is a phi with constants so it is determined after iteration 1.
155// %y is a phi based on a constant and %a so it is determined on
156// the iteration after %a is determined, so iteration 2.
157// %x is a phi based on a constant and %y so it is determined on
158// the iteration after %y, so iteration 3.
159// %i is based on itself (and is an induction variable) so it is
160// never determined.
161// This means that peeling off 3 iterations will result in being able to
162// remove the phi nodes for %a, %y, and %x. The arguments for the
163// corresponding calls to g are determined and the code for computing
164// x, y, and a can be removed.
165//
166// Similarly, there are cases where peeling makes Phi nodes loop-inductions
167// (i.e., the value is increased or decreased by a fixed amount on every
168// iteration). For example, consider the following function.
169//
170// #define N 100
171// void f(int a[], int b[]) {
172// int im = N - 1;
173// for (int i = 0; i < N; i++) {
174// a[i] = b[i] + b[im];
175// im = i;
176// }
177// }
178//
179// The IR of the loop will look something like the following.
180//
181// %i = phi i32 [ 0, %entry ], [ %i.next, %for.body ]
182// %im = phi i32 [ 99, %entry ], [ %i, %for.body ]
183// ...
184// %i.next = add nuw nsw i32 %i, 1
185// ...
186//
187// In this case, %im becomes a loop-induction variable by peeling 1 iteration,
188// because %i is a loop-induction one. The peeling count can be determined by
189// the same algorithm with loop-invariant case. Such peeling is profitable for
190// loop-vectorization.
191//
192// The PhiAnalyzer class calculates how many times a loop should be
193// peeled based on the above analysis of the phi nodes in the loop while
194// respecting the maximum specified.
195class PhiAnalyzer {
196public:
197 PhiAnalyzer(const Loop &L, unsigned MaxIterations, bool PeelForIV);
198
199 // Calculate the sufficient minimum number of iterations of the loop to peel
200 // such that phi instructions become determined (subject to allowable limits)
201 std::optional<unsigned> calculateIterationsToPeel();
202
203protected:
204 enum class PeelCounterType {
205 Invariant,
206 Induction,
207 };
208
209 using PeelCounterValue = std::pair<unsigned, PeelCounterType>;
210 using PeelCounter = std::optional<PeelCounterValue>;
211 const PeelCounter Unknown = std::nullopt;
212
213 // Add 1 respecting Unknown and return Unknown if result over MaxIterations
214 PeelCounter addOne(PeelCounter PC) const {
215 if (PC == Unknown)
216 return Unknown;
217 auto [Val, Ty] = *PC;
218 return (Val + 1 <= MaxIterations) ? PeelCounter({Val + 1, Ty}) : Unknown;
219 }
220
221 // Return a value representing zero for the given counter type.
222 PeelCounter makeZero(PeelCounterType Ty) const {
223 return PeelCounter({0, Ty});
224 }
225
226 // Calculate the number of iterations after which the given value becomes an
227 // invariant or an induction.
228 PeelCounter calculate(const Value &);
229
230 // Auxiliary function to calculate the number of iterations for a comparison
231 // instruction or a binary operator.
232 PeelCounter mergeTwoCounters(const Instruction &CmpOrBinaryOp,
233 const PeelCounterValue &LHS,
234 const PeelCounterValue &RHS) const;
235
236 // Returns true if the \p Phi is an induction in the target loop. This is a
237 // lightweight check and possible to detect an IV in some cases.
238 bool isInductionPHI(const PHINode *Phi) const;
239
240 const Loop &L;
241 const unsigned MaxIterations;
242 const bool PeelForIV;
243
244 // Map of Values to number of iterations to invariance or induction
245 SmallDenseMap<const Value *, PeelCounter> IterationsToInvarianceOrInduction;
246};
247
248PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations, bool PeelForIV)
249 : L(L), MaxIterations(MaxIterations), PeelForIV(PeelForIV) {
250 assert(canPeel(&L) && "loop is not suitable for peeling");
251 assert(MaxIterations > 0 && "no peeling is allowed?");
252}
253
254/// Test whether \p Phi is an induction variable. Although this can be
255/// determined using SCEV analysis, it is expensive to compute here. Instead,
256/// we perform cheaper checks that may not detect complex cases but are
257/// sufficient for some situations.
258bool PhiAnalyzer::isInductionPHI(const PHINode *Phi) const {
259 // Currently we only support a loop that has single latch.
260 BasicBlock *Latch = L.getLoopLatch();
261 if (Latch == nullptr)
262 return false;
263
264 Value *Cur = Phi->getIncomingValueForBlock(Latch);
265 SmallPtrSet<Value *, 4> Visited;
266 bool VisitBinOp = false;
267
268 // Starting from the incoming value of the Phi, we follow the use-def chain.
269 // We consider Phi to be an IV if we can reach it again by traversing only
270 // add, sub, or cast instructions.
271 while (true) {
272 if (Cur == Phi)
273 break;
274
275 // Avoid infinite loop.
276 if (!Visited.insert(Cur).second)
277 return false;
278
279 auto *I = dyn_cast<Instruction>(Cur);
280 if (!I || !L.contains(I))
281 return false;
282
283 if (auto *Cast = dyn_cast<CastInst>(I)) {
284 Cur = Cast->getOperand(0);
285 } else if (auto *BinOp = dyn_cast<BinaryOperator>(I)) {
286 if (BinOp->getOpcode() != Instruction::Add &&
287 BinOp->getOpcode() != Instruction::Sub)
288 return false;
289 if (!isa<ConstantInt>(BinOp->getOperand(1)))
290 return false;
291
292 VisitBinOp = true;
293 Cur = BinOp->getOperand(0);
294 } else {
295 return false;
296 }
297 }
298
299 // Ignore cases where no binary operations are visited.
300 return VisitBinOp;
301}
302
303/// When either \p LHS or \p RHS is an IV, the result of \p CmpOrBinaryOp is
304/// considered an IV only if it is an addition or a subtraction. Otherwise the
305/// result can be a value that is neither a loop-invariant nor an IV.
306///
307/// If both \p LHS and \p RHS are loop-invariants, then the result of
308/// \CmpOrBinaryOp is also a loop-invariant.
309PhiAnalyzer::PeelCounter
310PhiAnalyzer::mergeTwoCounters(const Instruction &CmpOrBinaryOp,
311 const PeelCounterValue &LHS,
312 const PeelCounterValue &RHS) const {
313 auto &[LVal, LTy] = LHS;
314 auto &[RVal, RTy] = RHS;
315 unsigned NewVal = std::max(LVal, RVal);
316
317 if (LTy == PeelCounterType::Induction || RTy == PeelCounterType::Induction) {
318 if (const auto *BinOp = dyn_cast<BinaryOperator>(&CmpOrBinaryOp)) {
319 if (BinOp->getOpcode() == Instruction::Add ||
320 BinOp->getOpcode() == Instruction::Sub)
321 return PeelCounter({NewVal, PeelCounterType::Induction});
322 }
323 return Unknown;
324 }
325 return PeelCounter({NewVal, PeelCounterType::Invariant});
326}
327
328// This function calculates the number of iterations after which the value
329// becomes an invariant. The pre-calculated values are memorized in a map.
330// N.B. This number will be Unknown or <= MaxIterations.
331// The function is calculated according to the following definition:
332// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
333// F(%x) = G(%y) + 1 (N.B. [MaxIterations | Unknown] + 1 => Unknown)
334// G(%y) = 0 if %y is a loop invariant
335// G(%y) = G(%BackEdgeValue) if %y is a phi in the header block
336// G(%y) = TODO: if %y is an expression based on phis and loop invariants
337// The example looks like:
338// %x = phi(0, %a) <-- becomes invariant starting from 3rd iteration.
339// %y = phi(0, 5)
340// %a = %y + 1
341// G(%y) = Unknown otherwise (including phi not in header block)
342PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
343 // If we already know the answer, take it from the map.
344 // Otherwise, place Unknown to map to avoid infinite recursion. Such
345 // cycles can never stop on an invariant.
346 auto [I, Inserted] =
347 IterationsToInvarianceOrInduction.try_emplace(&V, Unknown);
348 if (!Inserted)
349 return I->second;
350
351 if (L.isLoopInvariant(&V))
352 // Loop invariant so known at start.
353 return (IterationsToInvarianceOrInduction[&V] =
354 makeZero(PeelCounterType::Invariant));
355 if (const PHINode *Phi = dyn_cast<PHINode>(&V)) {
356 if (Phi->getParent() != L.getHeader()) {
357 // Phi is not in header block so Unknown.
358 assert(IterationsToInvarianceOrInduction[&V] == Unknown &&
359 "unexpected value saved");
360 return Unknown;
361 }
362
363 // If Phi is an induction, register it as a starting point.
364 if (PeelForIV && isInductionPHI(Phi))
365 return (IterationsToInvarianceOrInduction[&V] =
366 makeZero(PeelCounterType::Induction));
367
368 // We need to analyze the input from the back edge and add 1.
369 Value *Input = Phi->getIncomingValueForBlock(L.getLoopLatch());
370 PeelCounter Iterations = calculate(*Input);
371 assert(IterationsToInvarianceOrInduction[Input] == Iterations &&
372 "unexpected value saved");
373 return (IterationsToInvarianceOrInduction[Phi] = addOne(Iterations));
374 }
375 if (const Instruction *I = dyn_cast<Instruction>(&V)) {
376 if (isa<CmpInst>(I) || I->isBinaryOp()) {
377 // Binary instructions get the max of the operands.
378 PeelCounter LHS = calculate(*I->getOperand(0));
379 if (LHS == Unknown)
380 return Unknown;
381 PeelCounter RHS = calculate(*I->getOperand(1));
382 if (RHS == Unknown)
383 return Unknown;
384 return (IterationsToInvarianceOrInduction[I] =
385 mergeTwoCounters(*I, *LHS, *RHS));
386 }
387 if (I->isCast())
388 // Cast instructions get the value of the operand.
389 return (IterationsToInvarianceOrInduction[I] =
390 calculate(*I->getOperand(0)));
391 }
392 // TODO: handle more expressions
393
394 // Everything else is Unknown.
395 assert(IterationsToInvarianceOrInduction[&V] == Unknown &&
396 "unexpected value saved");
397 return Unknown;
398}
399
400std::optional<unsigned> PhiAnalyzer::calculateIterationsToPeel() {
401 unsigned Iterations = 0;
402 for (auto &PHI : L.getHeader()->phis()) {
403 PeelCounter ToInvarianceOrInduction = calculate(PHI);
404 if (ToInvarianceOrInduction != Unknown) {
405 unsigned Val = ToInvarianceOrInduction->first;
406 assert(Val <= MaxIterations && "bad result in phi analysis");
407 Iterations = std::max(Iterations, Val);
408 if (Iterations == MaxIterations)
409 break;
410 }
411 }
412 assert((Iterations <= MaxIterations) && "bad result in phi analysis");
413 return Iterations ? std::optional<unsigned>(Iterations) : std::nullopt;
414}
415
416} // unnamed namespace
417
418// Try to find any invariant memory reads that will become dereferenceable in
419// the remainder loop after peeling. The load must also be used (transitively)
420// by an exit condition. Returns the number of iterations to peel off (at the
421// moment either 0 or 1).
423 DominatorTree &DT,
424 AssumptionCache *AC) {
425 // Skip loops with a single exiting block, because there should be no benefit
426 // for the heuristic below.
427 if (L.getExitingBlock())
428 return 0;
429
430 // All non-latch exit blocks must have an UnreachableInst terminator.
431 // Otherwise the heuristic below may not be profitable.
433 L.getUniqueNonLatchExitBlocks(Exits);
434 if (any_of(Exits, [](const BasicBlock *BB) {
435 return !isa<UnreachableInst>(BB->getTerminator());
436 }))
437 return 0;
438
439 // Now look for invariant loads that dominate the latch and are not known to
440 // be dereferenceable. If there are such loads and no writes, they will become
441 // dereferenceable in the loop if the first iteration is peeled off. Also
442 // collect the set of instructions controlled by such loads. Only peel if an
443 // exit condition uses (transitively) such a load.
444 BasicBlock *Header = L.getHeader();
445 BasicBlock *Latch = L.getLoopLatch();
446 SmallPtrSet<Value *, 8> LoadUsers;
447 const DataLayout &DL = L.getHeader()->getDataLayout();
448 for (BasicBlock *BB : L.blocks()) {
449 for (Instruction &I : *BB) {
450 if (I.mayWriteToMemory())
451 return 0;
452
453 if (LoadUsers.contains(&I))
454 LoadUsers.insert_range(I.users());
455 // Do not look for reads in the header; they can already be hoisted
456 // without peeling.
457 if (BB == Header)
458 continue;
459 if (auto *LI = dyn_cast<LoadInst>(&I)) {
460 Value *Ptr = LI->getPointerOperand();
461 if (DT.dominates(BB, Latch) && L.isLoopInvariant(Ptr) &&
462 !isDereferenceablePointer(Ptr, LI->getType(), DL, LI, AC, &DT))
463 LoadUsers.insert_range(I.users());
464 }
465 }
466 }
467 SmallVector<BasicBlock *> ExitingBlocks;
468 L.getExitingBlocks(ExitingBlocks);
469 if (any_of(ExitingBlocks, [&LoadUsers](BasicBlock *Exiting) {
470 return LoadUsers.contains(Exiting->getTerminator());
471 }))
472 return 1;
473 return 0;
474}
475
477 const SCEV *BTC = SE.getBackedgeTakenCount(&L);
479 return false;
480
481 // Check if the exit condition of the loop can be adjusted by the peeling
482 // codegen. For now, it must
483 // * exit via the latch,
484 // * the exit condition must be a NE/EQ compare of an induction with step
485 // of 1 and must only be used by the exiting branch.
486 BasicBlock *Latch = L.getLoopLatch();
487 Value *Inc;
488 Value *Bound;
489 CmpPredicate Pred;
490 BasicBlock *Succ1;
491 BasicBlock *Succ2;
492 return Latch && Latch == L.getExitingBlock() &&
493 match(Latch->getTerminator(),
494 m_Br(m_OneUse(m_ICmp(Pred, m_Value(Inc), m_Value(Bound))),
495 m_BasicBlock(Succ1), m_BasicBlock(Succ2))) &&
496 ((Pred == CmpInst::ICMP_EQ && Succ2 == L.getHeader()) ||
497 (Pred == CmpInst::ICMP_NE && Succ1 == L.getHeader())) &&
498 Bound->getType()->isIntegerTy() &&
499 SE.isLoopInvariant(SE.getSCEV(Bound), &L) &&
500 match(SE.getSCEV(Inc),
502}
503
504/// Returns true if the last iteration can be peeled off and the condition (Pred
505/// LeftAR, RightSCEV) is known at the last iteration and the inverse condition
506/// is known at the second-to-last.
508 const SCEVAddRecExpr *LeftAR,
509 const SCEV *RightSCEV, ScalarEvolution &SE,
510 const TargetTransformInfo &TTI) {
511 if (!canPeelLastIteration(L, SE))
512 return false;
513
514 const SCEV *BTC = SE.getBackedgeTakenCount(&L);
515 SCEVExpander Expander(SE, L.getHeader()->getDataLayout(), "loop-peel");
516 if (!SE.isKnownNonZero(BTC) &&
518 L.getLoopPredecessor()->getTerminator()))
519 return false;
520
521 auto Guards = ScalarEvolution::LoopGuards::collect(&L, SE);
522 BTC = SE.applyLoopGuards(BTC, Guards);
523 RightSCEV = SE.applyLoopGuards(RightSCEV, Guards);
524 const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration(BTC, SE);
525 const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration(
526 SE.getMinusSCEV(BTC, SE.getOne(BTC->getType())), SE);
527
528 return SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), ValAtLastIter,
529 RightSCEV) &&
530 SE.isKnownPredicate(Pred, ValAtSecondToLastIter, RightSCEV);
531}
532
533// Return the number of iterations to peel off from the beginning and end of the
534// loop respectively, that make conditions in the body true/false. For example,
535// if we peel 2 iterations off the loop below, the condition i < 2 can be
536// evaluated at compile time.
537//
538// for (i = 0; i < n; i++)
539// if (i < 2)
540// ..
541// else
542// ..
543// }
544static std::pair<unsigned, unsigned>
545countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
546 const TargetTransformInfo &TTI) {
547 assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
548 unsigned DesiredPeelCount = 0;
549 unsigned DesiredPeelCountLast = 0;
550
551 // Do not peel the entire loop.
552 const SCEV *BE = SE.getConstantMaxBackedgeTakenCount(&L);
553 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(BE))
554 MaxPeelCount =
555 std::min((unsigned)SC->getAPInt().getLimitedValue() - 1, MaxPeelCount);
556
557 // Increase PeelCount while (IterVal Pred BoundSCEV) condition is satisfied;
558 // return true if inversed condition become known before reaching the
559 // MaxPeelCount limit.
560 auto PeelWhilePredicateIsKnown =
561 [&](unsigned &PeelCount, const SCEV *&IterVal, const SCEV *BoundSCEV,
562 const SCEV *Step, ICmpInst::Predicate Pred) {
563 while (PeelCount < MaxPeelCount &&
564 SE.isKnownPredicate(Pred, IterVal, BoundSCEV)) {
565 IterVal = SE.getAddExpr(IterVal, Step);
566 ++PeelCount;
567 }
568 return SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
569 BoundSCEV);
570 };
571
572 const unsigned MaxDepth = 4;
573 std::function<void(Value *, unsigned)> ComputePeelCount =
574 [&](Value *Condition, unsigned Depth) -> void {
575 if (!Condition->getType()->isIntegerTy() || Depth >= MaxDepth)
576 return;
577
578 Value *LeftVal, *RightVal;
579 if (match(Condition, m_And(m_Value(LeftVal), m_Value(RightVal))) ||
580 match(Condition, m_Or(m_Value(LeftVal), m_Value(RightVal)))) {
581 ComputePeelCount(LeftVal, Depth + 1);
582 ComputePeelCount(RightVal, Depth + 1);
583 return;
584 }
585
586 CmpPredicate Pred;
587 if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
588 return;
589
590 const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
591 const SCEV *RightSCEV = SE.getSCEV(RightVal);
592
593 // Do not consider predicates that are known to be true or false
594 // independently of the loop iteration.
595 if (SE.evaluatePredicate(Pred, LeftSCEV, RightSCEV))
596 return;
597
598 // Check if we have a condition with one AddRec and one non AddRec
599 // expression. Normalize LeftSCEV to be the AddRec.
600 if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
601 if (isa<SCEVAddRecExpr>(RightSCEV)) {
602 std::swap(LeftSCEV, RightSCEV);
604 } else
605 return;
606 }
607
608 const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
609
610 // Avoid huge SCEV computations in the loop below, make sure we only
611 // consider AddRecs of the loop we are trying to peel.
612 if (!LeftAR->isAffine() || LeftAR->getLoop() != &L)
613 return;
614 if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) &&
615 !SE.getMonotonicPredicateType(LeftAR, Pred))
616 return;
617
618 // Check if extending the current DesiredPeelCount lets us evaluate Pred
619 // or !Pred in the loop body statically.
620 unsigned NewPeelCount = DesiredPeelCount;
621
622 const SCEV *IterVal = LeftAR->evaluateAtIteration(
623 SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);
624
625 // If the original condition is not known, get the negated predicate
626 // (which holds on the else branch) and check if it is known. This allows
627 // us to peel of iterations that make the original condition false.
628 if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
630
631 const SCEV *Step = LeftAR->getStepRecurrence(SE);
632 if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, RightSCEV, Step,
633 Pred)) {
634 if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE, TTI))
635 DesiredPeelCountLast = 1;
636 return;
637 }
638
639 // However, for equality comparisons, that isn't always sufficient to
640 // eliminate the comparsion in loop body, we may need to peel one more
641 // iteration. See if that makes !Pred become unknown again.
642 const SCEV *NextIterVal = SE.getAddExpr(IterVal, Step);
643 if (ICmpInst::isEquality(Pred) &&
644 !SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), NextIterVal,
645 RightSCEV) &&
646 !SE.isKnownPredicate(Pred, IterVal, RightSCEV) &&
647 SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) {
648 if (NewPeelCount >= MaxPeelCount)
649 return; // Need to peel one more iteration, but can't. Give up.
650 ++NewPeelCount; // Great!
651 }
652
653 DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount);
654 DesiredPeelCountLast = std::max(DesiredPeelCountLast, NewPeelCount);
655 };
656
657 auto ComputePeelCountMinMax = [&](MinMaxIntrinsic *MinMax) {
658 if (!MinMax->getType()->isIntegerTy())
659 return;
660 Value *LHS = MinMax->getLHS(), *RHS = MinMax->getRHS();
661 const SCEV *BoundSCEV, *IterSCEV;
662 if (L.isLoopInvariant(LHS)) {
663 BoundSCEV = SE.getSCEV(LHS);
664 IterSCEV = SE.getSCEV(RHS);
665 } else if (L.isLoopInvariant(RHS)) {
666 BoundSCEV = SE.getSCEV(RHS);
667 IterSCEV = SE.getSCEV(LHS);
668 } else
669 return;
670 const auto *AddRec = dyn_cast<SCEVAddRecExpr>(IterSCEV);
671 // For simplicity, we support only affine recurrences.
672 if (!AddRec || !AddRec->isAffine() || AddRec->getLoop() != &L)
673 return;
674 const SCEV *Step = AddRec->getStepRecurrence(SE);
675 bool IsSigned = MinMax->isSigned();
676 // To minimize number of peeled iterations, we use strict relational
677 // predicates here.
679 if (SE.isKnownPositive(Step))
680 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
681 else if (SE.isKnownNegative(Step))
682 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
683 else
684 return;
685 // Check that AddRec is not wrapping.
686 if (!(IsSigned ? AddRec->hasNoSignedWrap() : AddRec->hasNoUnsignedWrap()))
687 return;
688 unsigned NewPeelCount = DesiredPeelCount;
689 const SCEV *IterVal = AddRec->evaluateAtIteration(
690 SE.getConstant(AddRec->getType(), NewPeelCount), SE);
691 if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, BoundSCEV, Step,
692 Pred)) {
693 if (shouldPeelLastIteration(L, Pred, AddRec, BoundSCEV, SE, TTI))
694 DesiredPeelCountLast = 1;
695 return;
696 }
697 DesiredPeelCount = NewPeelCount;
698 };
699
700 for (BasicBlock *BB : L.blocks()) {
701 for (Instruction &I : *BB) {
703 ComputePeelCount(SI->getCondition(), 0);
705 ComputePeelCountMinMax(MinMax);
706 }
707
708 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
709 if (!BI || BI->isUnconditional())
710 continue;
711
712 // Ignore loop exit condition.
713 if (L.getLoopLatch() == BB)
714 continue;
715
716 ComputePeelCount(BI->getCondition(), 0);
717 }
718
719 return {DesiredPeelCount, DesiredPeelCountLast};
720}
721
722/// This "heuristic" exactly matches implicit behavior which used to exist
723/// inside getLoopEstimatedTripCount. It was added here to keep an
724/// improvement inside that API from causing peeling to become more aggressive.
725/// This should probably be removed.
727 BasicBlock *Latch = L->getLoopLatch();
728 if (!Latch)
729 return true;
730
731 BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
732 if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
733 return true;
734
735 assert((LatchBR->getSuccessor(0) == L->getHeader() ||
736 LatchBR->getSuccessor(1) == L->getHeader()) &&
737 "At least one edge out of the latch must go to the header");
738
740 L->getUniqueNonLatchExitBlocks(ExitBlocks);
741 return any_of(ExitBlocks, [](const BasicBlock *EB) {
742 return !EB->getTerminatingDeoptimizeCall();
743 });
744}
745
746
747// Return the number of iterations we want to peel off.
748void llvm::computePeelCount(Loop *L, unsigned LoopSize,
750 unsigned TripCount, DominatorTree &DT,
752 AssumptionCache *AC, unsigned Threshold) {
753 assert(LoopSize > 0 && "Zero loop size is not allowed!");
754 // Save the PP.PeelCount value set by the target in
755 // TTI.getPeelingPreferences or by the flag -unroll-peel-count.
756 unsigned TargetPeelCount = PP.PeelCount;
757 PP.PeelCount = 0;
758 PP.PeelLast = false;
759 if (!canPeel(L))
760 return;
761
762 // Only try to peel innermost loops by default.
763 // The constraint can be relaxed by the target in TTI.getPeelingPreferences
764 // or by the flag -unroll-allow-loop-nests-peeling.
765 if (!PP.AllowLoopNestsPeeling && !L->isInnermost())
766 return;
767
768 // If the user provided a peel count, use that.
769 bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
770 if (UserPeelCount) {
771 LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
772 << " iterations.\n");
774 PP.PeelProfiledIterations = true;
775 return;
776 }
777
778 // Skip peeling if it's disabled.
779 if (!PP.AllowPeeling)
780 return;
781
782 // Check that we can peel at least one iteration.
783 if (2 * LoopSize > Threshold)
784 return;
785
786 unsigned AlreadyPeeled = 0;
788 AlreadyPeeled = *Peeled;
789 // Stop if we already peeled off the maximum number of iterations.
790 if (AlreadyPeeled >= UnrollPeelMaxCount)
791 return;
792
793 // Pay respect to limitations implied by loop size and the max peel count.
794 unsigned MaxPeelCount = UnrollPeelMaxCount;
795 MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1);
796
797 // Start the max computation with the PP.PeelCount value set by the target
798 // in TTI.getPeelingPreferences or by the flag -unroll-peel-count.
799 unsigned DesiredPeelCount = TargetPeelCount;
800
801 // Here we try to get rid of Phis which become invariants or inductions after
802 // 1, 2, ..., N iterations of the loop. For this we compute the number for
803 // iterations after which every Phi is guaranteed to become an invariant or an
804 // induction, and try to peel the maximum number of iterations among these
805 // values, thus turning all those Phis into invariants or inductions.
806 if (MaxPeelCount > DesiredPeelCount) {
807 // Check how many iterations are useful for resolving Phis
808 auto NumPeels = PhiAnalyzer(*L, MaxPeelCount, EnablePeelingForIV)
809 .calculateIterationsToPeel();
810 if (NumPeels)
811 DesiredPeelCount = std::max(DesiredPeelCount, *NumPeels);
812 }
813
814 const auto &[CountToEliminateCmps, CountToEliminateCmpsLast] =
815 countToEliminateCompares(*L, MaxPeelCount, SE, TTI);
816 DesiredPeelCount = std::max(DesiredPeelCount, CountToEliminateCmps);
817
818 if (DesiredPeelCount == 0)
819 DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(*L, DT, AC);
820
821 if (DesiredPeelCount > 0) {
822 DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
823 // Consider max peel count limitation.
824 assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
825 if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
826 LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
827 << " iteration(s) to turn"
828 << " some Phis into invariants or inductions.\n");
829 PP.PeelCount = DesiredPeelCount;
830 PP.PeelProfiledIterations = false;
831 PP.PeelLast = false;
832 return;
833 }
834 }
835
836 if (CountToEliminateCmpsLast > 0) {
837 unsigned DesiredPeelCountLast =
838 std::min(CountToEliminateCmpsLast, MaxPeelCount);
839 // Consider max peel count limitation.
840 assert(DesiredPeelCountLast > 0 && "Wrong loop size estimation?");
841 if (DesiredPeelCountLast + AlreadyPeeled <= UnrollPeelMaxCount) {
842 LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
843 << " iteration(s) to turn"
844 << " some Phis into invariants.\n");
845 PP.PeelCount = DesiredPeelCountLast;
846 PP.PeelProfiledIterations = false;
847 PP.PeelLast = true;
848 return;
849 }
850 }
851
852 // Bail if we know the statically calculated trip count.
853 // In this case we rather prefer partial unrolling.
854 if (TripCount)
855 return;
856
857 // Do not apply profile base peeling if it is disabled.
859 return;
860 // If we don't know the trip count, but have reason to believe the average
861 // trip count is low, peeling should be beneficial, since we will usually
862 // hit the peeled section.
863 // We only do this in the presence of profile information, since otherwise
864 // our estimates of the trip count are not reliable enough.
865 if (L->getHeader()->getParent()->hasProfileData()) {
867 return;
868 std::optional<unsigned> EstimatedTripCount = getLoopEstimatedTripCount(L);
869 if (!EstimatedTripCount)
870 return;
871
872 LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is "
873 << *EstimatedTripCount << "\n");
874
875 if (*EstimatedTripCount + AlreadyPeeled <= MaxPeelCount) {
876 unsigned PeelCount = *EstimatedTripCount;
877 LLVM_DEBUG(dbgs() << "Peeling first " << PeelCount << " iterations.\n");
878 PP.PeelCount = PeelCount;
879 return;
880 }
881 LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n");
882 LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
883 LLVM_DEBUG(dbgs() << "Loop cost: " << LoopSize << "\n");
884 LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n");
885 LLVM_DEBUG(dbgs() << "Max peel count by cost: "
886 << (Threshold / LoopSize - 1) << "\n");
887 }
888}
889
890/// Clones the body of the loop L, putting it between \p InsertTop and \p
891/// InsertBot.
892/// \param IterNumber The serial number of the iteration currently being
893/// peeled off.
894/// \param PeelLast Peel off the last iterations from \p L.
895/// \param ExitEdges The exit edges of the original loop.
896/// \param[out] NewBlocks A list of the blocks in the newly created clone
897/// \param[out] VMap The value map between the loop and the new clone.
898/// \param LoopBlocks A helper for DFS-traversal of the loop.
899/// \param LVMap A value-map that maps instructions from the original loop to
900/// instructions in the last peeled-off iteration.
901static void cloneLoopBlocks(
902 Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop,
903 BasicBlock *InsertBot, BasicBlock *OrigPreHeader,
904 SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
905 SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
907 LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes,
908 ScalarEvolution &SE) {
909 BasicBlock *Header = L->getHeader();
910 BasicBlock *Latch = L->getLoopLatch();
911 BasicBlock *PreHeader = L->getLoopPreheader();
912
913 Function *F = Header->getParent();
914 LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
915 LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
916 Loop *ParentLoop = L->getParentLoop();
917
918 // For each block in the original loop, create a new copy,
919 // and update the value map with the newly created values.
920 for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
921 BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
922 NewBlocks.push_back(NewBB);
923
924 // If an original block is an immediate child of the loop L, its copy
925 // is a child of a ParentLoop after peeling. If a block is a child of
926 // a nested loop, it is handled in the cloneLoop() call below.
927 if (ParentLoop && LI->getLoopFor(*BB) == L)
928 ParentLoop->addBasicBlockToLoop(NewBB, *LI);
929
930 VMap[*BB] = NewBB;
931
932 // If dominator tree is available, insert nodes to represent cloned blocks.
933 if (DT) {
934 if (Header == *BB)
935 DT->addNewBlock(NewBB, InsertTop);
936 else {
937 DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
938 // VMap must contain entry for IDom, as the iteration order is RPO.
939 DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
940 }
941 }
942 }
943
944 {
945 // Identify what other metadata depends on the cloned version. After
946 // cloning, replace the metadata with the corrected version for both
947 // memory instructions and noalias intrinsics.
948 std::string Ext = (Twine("Peel") + Twine(IterNumber)).str();
949 cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
950 Header->getContext(), Ext);
951 }
952
953 // Recursively create the new Loop objects for nested loops, if any,
954 // to preserve LoopInfo.
955 for (Loop *ChildLoop : *L) {
956 cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr);
957 }
958
959 // Hook-up the control flow for the newly inserted blocks.
960 // The new header is hooked up directly to the "top", which is either
961 // the original loop preheader (for the first iteration) or the previous
962 // iteration's exiting block (for every other iteration)
963 InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));
964
965 // Similarly, for the latch:
966 // The original exiting edge is still hooked up to the loop exit.
967 BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
968 if (PeelLast) {
969 // This is the last iteration and we definitely will go to the exit. Just
970 // set both successors to InsertBot and let the branch be simplified later.
971 assert(IterNumber == 0 && "Only peeling a single iteration implemented.");
972 auto *LatchTerm = cast<BranchInst>(NewLatch->getTerminator());
973 LatchTerm->setSuccessor(0, InsertBot);
974 LatchTerm->setSuccessor(1, InsertBot);
975 } else {
976 auto *LatchTerm = cast<Instruction>(NewLatch->getTerminator());
977 // The backedge now goes to the "bottom", which is either the loop's real
978 // header (for the last peeled iteration) or the copied header of the next
979 // iteration (for every other iteration)
980 for (unsigned idx = 0, e = LatchTerm->getNumSuccessors(); idx < e; ++idx) {
981 if (LatchTerm->getSuccessor(idx) == Header) {
982 LatchTerm->setSuccessor(idx, InsertBot);
983 break;
984 }
985 }
986 }
987 if (DT)
988 DT->changeImmediateDominator(InsertBot, NewLatch);
989
990 // The new copy of the loop body starts with a bunch of PHI nodes
991 // that pick an incoming value from either the preheader, or the previous
992 // loop iteration. Since this copy is no longer part of the loop, we
993 // resolve this statically:
994 if (PeelLast) {
995 // For the last iteration, we introduce new phis for each header phi in
996 // InsertTop, using the incoming value from the preheader for the original
997 // preheader (when skipping the main loop) and the incoming value from the
998 // latch for the latch (when continuing from the main loop).
999 IRBuilder<> B(InsertTop, InsertTop->getFirstNonPHIIt());
1000 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
1001 PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
1002 PHINode *PN = B.CreatePHI(NewPHI->getType(), 2);
1003 NewPHI->eraseFromParent();
1004 if (OrigPreHeader)
1005 PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(PreHeader),
1006 OrigPreHeader);
1007
1008 PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(Latch),
1009 Latch);
1010 VMap[&*I] = PN;
1011 }
1012 } else {
1013 // For the first iteration, we use the value from the preheader directly.
1014 // For any other iteration, we replace the phi with the value generated by
1015 // the immediately preceding clone of the loop body (which represents
1016 // the previous iteration).
1017 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
1018 PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
1019 if (IterNumber == 0) {
1020 VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader);
1021 } else {
1022 Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch);
1023 Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
1024 if (LatchInst && L->contains(LatchInst))
1025 VMap[&*I] = LVMap[LatchInst];
1026 else
1027 VMap[&*I] = LatchVal;
1028 }
1029 NewPHI->eraseFromParent();
1030 }
1031 }
1032
1033 // Fix up the outgoing values - we need to add a value for the iteration
1034 // we've just created. Note that this must happen *after* the incoming
1035 // values are adjusted, since the value going out of the latch may also be
1036 // a value coming into the header.
1037 for (auto Edge : ExitEdges)
1038 for (PHINode &PHI : Edge.second->phis()) {
1039 Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
1040 Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
1041 if (LatchInst && L->contains(LatchInst))
1042 LatchVal = VMap[LatchVal];
1043 PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
1045 }
1046
1047 // LastValueMap is updated with the values for the current loop
1048 // which are used the next time this function is called.
1049 for (auto KV : VMap)
1050 LVMap[KV.first] = KV.second;
1051}
1052
1053TargetTransformInfo::PeelingPreferences
1055 const TargetTransformInfo &TTI,
1056 std::optional<bool> UserAllowPeeling,
1057 std::optional<bool> UserAllowProfileBasedPeeling,
1058 bool UnrollingSpecficValues) {
1060
1061 // Set the default values.
1062 PP.PeelCount = 0;
1063 PP.AllowPeeling = true;
1064 PP.AllowLoopNestsPeeling = false;
1065 PP.PeelLast = false;
1066 PP.PeelProfiledIterations = true;
1067
1068 // Get the target specifc values.
1069 TTI.getPeelingPreferences(L, SE, PP);
1070
1071 // User specified values using cl::opt.
1072 if (UnrollingSpecficValues) {
1073 if (UnrollPeelCount.getNumOccurrences() > 0)
1075 if (UnrollAllowPeeling.getNumOccurrences() > 0)
1077 if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
1079 }
1080
1081 // User specifed values provided by argument.
1082 if (UserAllowPeeling)
1083 PP.AllowPeeling = *UserAllowPeeling;
1084 if (UserAllowProfileBasedPeeling)
1085 PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
1086
1087 return PP;
1088}
1089
1090/// Peel off the first \p PeelCount iterations of loop \p L.
1091///
1092/// Note that this does not peel them off as a single straight-line block.
1093/// Rather, each iteration is peeled off separately, and needs to check the
1094/// exit condition.
1095/// For loops that dynamically execute \p PeelCount iterations or less
1096/// this provides a benefit, since the peeled off iterations, which account
1097/// for the bulk of dynamic execution, can be further simplified by scalar
1098/// optimizations.
1099bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
1101 bool PreserveLCSSA, ValueToValueMapTy &LVMap) {
1102 assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
1103 assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
1104 assert((!PeelLast || (canPeelLastIteration(*L, *SE) && PeelCount == 1)) &&
1105 "when peeling the last iteration, the loop must be supported and can "
1106 "only peel a single iteration");
1107
1108 LoopBlocksDFS LoopBlocks(L);
1109 LoopBlocks.perform(LI);
1110
1111 BasicBlock *Header = L->getHeader();
1112 BasicBlock *PreHeader = L->getLoopPreheader();
1113 BasicBlock *Latch = L->getLoopLatch();
1115 L->getExitEdges(ExitEdges);
1116
1117 // Remember dominators of blocks we might reach through exits to change them
1118 // later. Immediate dominator of such block might change, because we add more
1119 // routes which can lead to the exit: we can reach it from the peeled
1120 // iterations too.
1121 DenseMap<BasicBlock *, BasicBlock *> NonLoopBlocksIDom;
1122 for (auto *BB : L->blocks()) {
1123 auto *BBDomNode = DT.getNode(BB);
1124 SmallVector<BasicBlock *, 16> ChildrenToUpdate;
1125 for (auto *ChildDomNode : BBDomNode->children()) {
1126 auto *ChildBB = ChildDomNode->getBlock();
1127 if (!L->contains(ChildBB))
1128 ChildrenToUpdate.push_back(ChildBB);
1129 }
1130 // The new idom of the block will be the nearest common dominator
1131 // of all copies of the previous idom. This is equivalent to the
1132 // nearest common dominator of the previous idom and the first latch,
1133 // which dominates all copies of the previous idom.
1134 BasicBlock *NewIDom = DT.findNearestCommonDominator(BB, Latch);
1135 for (auto *ChildBB : ChildrenToUpdate)
1136 NonLoopBlocksIDom[ChildBB] = NewIDom;
1137 }
1138
1139 Function *F = Header->getParent();
1140
1141 // Set up all the necessary basic blocks.
1142 BasicBlock *InsertTop;
1143 BasicBlock *InsertBot;
1144 BasicBlock *NewPreHeader = nullptr;
1146 if (PeelLast) {
1147 // It is convenient to split the single exit block from the latch the
1148 // into 3 parts - two blocks to anchor the peeled copy of the loop body,
1149 // and a new final exit block.
1150
1151 // Peeling the last iteration transforms.
1152 //
1153 // PreHeader:
1154 // ...
1155 // Header:
1156 // LoopBody
1157 // If (cond) goto Header
1158 // Exit:
1159 //
1160 // into
1161 //
1162 // Header:
1163 // LoopBody
1164 // If (cond) goto Header
1165 // InsertTop:
1166 // LoopBody
1167 // If (!cond) goto InsertBot
1168 // InsertBot:
1169 // Exit:
1170 // ...
1171 BasicBlock *Exit = L->getExitBlock();
1172 for (PHINode &P : Exit->phis())
1173 ExitValues[&P] = P.getIncomingValueForBlock(Latch);
1174
1175 const SCEV *BTC = SE->getBackedgeTakenCount(L);
1176
1177 InsertTop = SplitEdge(Latch, Exit, &DT, LI);
1178 InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
1179
1180 InsertTop->setName(Exit->getName() + ".peel.begin");
1181 InsertBot->setName(Exit->getName() + ".peel.next");
1182 NewPreHeader = nullptr;
1183
1184 // If the original loop may only execute a single iteration we need to
1185 // insert a trip count check and skip the original loop with the last
1186 // iteration peeled off if necessary. Either way, we must update branch
1187 // weights to maintain the loop body frequency.
1188 if (SE->isKnownNonZero(BTC)) {
1189 // We have just proven that, when reached, the original loop always
1190 // executes at least two iterations. Thus, we unconditionally execute
1191 // both the remaining loop's initial iteration and the peeled iteration.
1192 // But that increases the latter's frequency above its frequency in the
1193 // original loop. To maintain the total frequency, we compensate by
1194 // decreasing the remaining loop body's frequency to indicate one less
1195 // iteration.
1196 //
1197 // We use this formula to convert probability to/from frequency:
1198 // Sum(i=0..inf)(P^i) = 1/(1-P) = Freq.
1199 if (BranchProbability P = getLoopProbability(L); !P.isUnknown()) {
1200 // Trying to subtract one from an infinite loop is pointless, and our
1201 // formulas then produce division by zero, so skip that case.
1202 if (BranchProbability ExitP = P.getCompl(); !ExitP.isZero()) {
1203 double Freq = 1 / ExitP.toDouble();
1204 // No branch weights can produce a frequency of less than one given
1205 // the initial iteration, and our formulas produce a negative
1206 // probability if we try.
1207 assert(Freq >= 1.0 && "expected freq >= 1 due to initial iteration");
1208 double NewFreq = std::max(Freq - 1, 1.0);
1210 L, BranchProbability::getBranchProbability(1 - 1 / NewFreq));
1211 }
1212 }
1213 } else {
1214 NewPreHeader = SplitEdge(PreHeader, Header, &DT, LI);
1215 SCEVExpander Expander(*SE, Latch->getDataLayout(), "loop-peel");
1216
1217 BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
1218 Value *BTCValue =
1219 Expander.expandCodeFor(BTC, BTC->getType(), PreHeaderBR);
1220 IRBuilder<> B(PreHeaderBR);
1221 Value *Cond =
1222 B.CreateICmpNE(BTCValue, ConstantInt::get(BTCValue->getType(), 0));
1223 auto *BI = B.CreateCondBr(Cond, NewPreHeader, InsertTop);
1224 SmallVector<uint32_t> Weights;
1225 auto *OrigLatchBr = Latch->getTerminator();
1226 auto HasBranchWeights = !ProfcheckDisableMetadataFixes &&
1227 extractBranchWeights(*OrigLatchBr, Weights);
1228 if (HasBranchWeights) {
1229 // The probability that the new guard skips the loop to execute just one
1230 // iteration is the original loop's probability of exiting at the latch
1231 // after any iteration. That should maintain the original loop body
1232 // frequency. Upon arriving at the loop, due to the guard, the
1233 // probability of reaching iteration i of the new loop is the
1234 // probability of reaching iteration i+1 of the original loop. The
1235 // probability of reaching the peeled iteration is 1, which is the
1236 // probability of reaching iteration 0 of the original loop.
1237 if (L->getExitBlock() == OrigLatchBr->getSuccessor(0))
1238 std::swap(Weights[0], Weights[1]);
1239 setBranchWeights(*BI, Weights, /*IsExpected=*/false);
1240 }
1241 PreHeaderBR->eraseFromParent();
1242
1243 // PreHeader now dominates InsertTop.
1244 DT.changeImmediateDominator(InsertTop, PreHeader);
1245 }
1246 } else {
1247 // It is convenient to split the preheader into 3 parts - two blocks to
1248 // anchor the peeled copy of the loop body, and a new preheader for the
1249 // "real" loop.
1250
1251 // Peeling the first iteration transforms.
1252 //
1253 // PreHeader:
1254 // ...
1255 // Header:
1256 // LoopBody
1257 // If (cond) goto Header
1258 // Exit:
1259 //
1260 // into
1261 //
1262 // InsertTop:
1263 // LoopBody
1264 // If (!cond) goto Exit
1265 // InsertBot:
1266 // NewPreHeader:
1267 // ...
1268 // Header:
1269 // LoopBody
1270 // If (cond) goto Header
1271 // Exit:
1272 //
1273 // Each following iteration will split the current bottom anchor in two,
1274 // and put the new copy of the loop body between these two blocks. That
1275 // is, after peeling another iteration from the example above, we'll
1276 // split InsertBot, and get:
1277 //
1278 // InsertTop:
1279 // LoopBody
1280 // If (!cond) goto Exit
1281 // InsertBot:
1282 // LoopBody
1283 // If (!cond) goto Exit
1284 // InsertBot.next:
1285 // NewPreHeader:
1286 // ...
1287 // Header:
1288 // LoopBody
1289 // If (cond) goto Header
1290 // Exit:
1291 //
1292 InsertTop = SplitEdge(PreHeader, Header, &DT, LI);
1293 InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
1294 NewPreHeader = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
1295
1296 InsertTop->setName(Header->getName() + ".peel.begin");
1297 InsertBot->setName(Header->getName() + ".peel.next");
1298 NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
1299 }
1300
1301 Instruction *LatchTerm =
1302 cast<Instruction>(cast<BasicBlock>(Latch)->getTerminator());
1303
1304 // Identify what noalias metadata is inside the loop: if it is inside the
1305 // loop, the associated metadata must be cloned for each iteration.
1306 SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
1307 identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
1308
1309 // For each peeled-off iteration, make a copy of the loop.
1310 ValueToValueMapTy VMap;
1311 for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
1313
1314 cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot,
1315 NewPreHeader ? PreHeader : nullptr, ExitEdges, NewBlocks,
1316 LoopBlocks, VMap, LVMap, &DT, LI,
1317 LoopLocalNoAliasDeclScopes, *SE);
1318
1319 // Remap to use values from the current iteration instead of the
1320 // previous one.
1321 remapInstructionsInBlocks(NewBlocks, VMap);
1322
1323 if (Iter == 0) {
1324 if (PeelLast) {
1325 // Adjust the exit condition so the loop exits one iteration early.
1326 // For now we simply subtract one form the second operand of the
1327 // exit condition. This relies on the peel count computation to
1328 // check that this is actually legal. In particular, it ensures that
1329 // the first operand of the compare is an AddRec with step 1 and we
1330 // execute more than one iteration.
1331 auto *Cmp =
1332 cast<ICmpInst>(L->getLoopLatch()->getTerminator()->getOperand(0));
1333 IRBuilder B(Cmp);
1334 Cmp->setOperand(
1335 1, B.CreateSub(Cmp->getOperand(1),
1336 ConstantInt::get(Cmp->getOperand(1)->getType(), 1)));
1337 } else {
1338 // Update IDoms of the blocks reachable through exits.
1339 for (auto BBIDom : NonLoopBlocksIDom)
1340 DT.changeImmediateDominator(BBIDom.first,
1341 cast<BasicBlock>(LVMap[BBIDom.second]));
1342 }
1343 }
1344
1345#ifdef EXPENSIVE_CHECKS
1346 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1347#endif
1348
1349 // Remove Loop metadata from the latch branch instruction
1350 // because it is not the Loop's latch branch anymore.
1351 auto *LatchTermCopy = cast<Instruction>(VMap[LatchTerm]);
1352 LatchTermCopy->setMetadata(LLVMContext::MD_loop, nullptr);
1353
1354 InsertTop = InsertBot;
1355 InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
1356 InsertBot->setName(Header->getName() + ".peel.next");
1357
1358 F->splice(InsertTop->getIterator(), F, NewBlocks[0]->getIterator(),
1359 F->end());
1360 }
1361
1362 if (PeelLast) {
1363 // Now adjust users of the original exit values by replacing them with the
1364 // exit value from the peeled iteration and remove them.
1365 for (const auto &[P, E] : ExitValues) {
1366 Instruction *ExitInst = dyn_cast<Instruction>(E);
1367 if (ExitInst && L->contains(ExitInst))
1368 P->replaceAllUsesWith(&*VMap[ExitInst]);
1369 else
1370 P->replaceAllUsesWith(E);
1371 P->eraseFromParent();
1372 }
1373 formLCSSA(*L, DT, LI, SE);
1374 } else {
1375 // Now adjust the phi nodes in the loop header to get their initial values
1376 // from the last peeled-off iteration instead of the preheader.
1377 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
1379 Value *NewVal = PHI->getIncomingValueForBlock(Latch);
1380 Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
1381 if (LatchInst && L->contains(LatchInst))
1382 NewVal = LVMap[LatchInst];
1383
1384 PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
1385 }
1386 }
1387
1388 // Update Metadata for count of peeled off iterations.
1389 unsigned AlreadyPeeled = 0;
1391 AlreadyPeeled = *Peeled;
1392 unsigned TotalPeeled = AlreadyPeeled + PeelCount;
1394
1395 // Update metadata for the estimated trip count. The original branch weight
1396 // metadata is already correct for both the remaining loop and the peeled loop
1397 // iterations, so do not adjust it.
1398 //
1399 // For example, consider what happens when peeling 2 iterations from a loop
1400 // with an estimated trip count of 10 and inserting them before the remaining
1401 // loop. Each of the peeled iterations and each iteration in the remaining
1402 // loop still has the same probability of exiting the *entire original* loop
1403 // as it did when in the original loop, and thus it should still have the same
1404 // branch weights. The peeled iterations' non-zero probabilities of exiting
1405 // already appropriately reduce the probability of reaching the remaining
1406 // iterations just as they did in the original loop. Trying to also adjust
1407 // the remaining loop's branch weights to reflect its new trip count of 8 will
1408 // erroneously further reduce its block frequencies. However, in case an
1409 // analysis later needs to determine the trip count of the remaining loop
1410 // while examining it in isolation without considering the probability of
1411 // actually reaching it, we store the new trip count as separate metadata.
1412 if (auto EstimatedTripCount = getLoopEstimatedTripCount(L)) {
1413 unsigned EstimatedTripCountNew = *EstimatedTripCount;
1414 if (EstimatedTripCountNew < TotalPeeled)
1415 EstimatedTripCountNew = 0;
1416 else
1417 EstimatedTripCountNew -= TotalPeeled;
1418 setLoopEstimatedTripCount(L, EstimatedTripCountNew);
1419 }
1420
1421 if (Loop *ParentLoop = L->getParentLoop())
1422 L = ParentLoop;
1423
1424 // We modified the loop, update SE.
1425 SE->forgetTopmostLoop(L);
1427
1428#ifdef EXPENSIVE_CHECKS
1429 // Finally DomtTree must be correct.
1430 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1431#endif
1432
1433 // FIXME: Incrementally update loop-simplify
1434 simplifyLoop(L, &DT, LI, SE, AC, nullptr, PreserveLCSSA);
1435
1436 NumPeeled++;
1437 NumPeeledEnd += PeelLast;
1438
1439 return true;
1440}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the DenseMap class.
static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred, const SCEVAddRecExpr *LeftAR, const SCEV *RightSCEV, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Returns true if the last iteration can be peeled off and the condition (Pred LeftAR,...
Definition LoopPeel.cpp:507
static bool violatesLegacyMultiExitLoopCheck(Loop *L)
This "heuristic" exactly matches implicit behavior which used to exist inside getLoopEstimatedTripCou...
Definition LoopPeel.cpp:726
static std::pair< unsigned, unsigned > countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Definition LoopPeel.cpp:545
static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L, DominatorTree &DT, AssumptionCache *AC)
Definition LoopPeel.cpp:422
static void cloneLoopBlocks(Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *OrigPreHeader, SmallVectorImpl< std::pair< BasicBlock *, BasicBlock * > > &ExitEdges, SmallVectorImpl< BasicBlock * > &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, LoopInfo *LI, ArrayRef< MDNode * > LoopLocalNoAliasDeclScopes, ScalarEvolution &SE)
Clones the body of the loop L, putting it between InsertTop and InsertBot.
Definition LoopPeel.cpp:901
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define P(N)
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Conditional or Unconditional Branch instruction.
unsigned getNumSuccessors() const
BasicBlock * getSuccessor(unsigned i) const
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
DomTreeNodeBase * getIDom() const
NodeT * getBlock() const
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Store the result of a depth first search within basic blocks contained by a single loop.
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
This class represents min/max intrinsics.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
This node represents a polynomial recurrence on the trip count of the specified loop.
LLVM_ABI const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents a constant integer value.
This class uses information about analyze scalars to rewrite expressions in canonical form.
bool isHighCostExpansion(ArrayRef< const SCEV * > Exprs, Loop *L, unsigned Budget, const TargetTransformInfo *TTI, const Instruction *At)
Return true for expressions that can't be evaluated at runtime within given Budget.
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
static LLVM_ABI LoopGuards collect(const Loop *L, ScalarEvolution &SE)
Collect rewrite map for loop guards for loop L, together with flags indicating if NUW and NSW can be ...
The main scalar evolution driver.
const SCEV * getConstantMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEVConstant that is greater than or equal to (i.e.
LLVM_ABI bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI void forgetTopmostLoop(const Loop *L)
LLVM_ABI void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V)
Forget LCSSA phi node V of loop L to which a new predecessor was added, such that it may no longer be...
LLVM_ABI std::optional< MonotonicPredicateType > getMonotonicPredicateType(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred)
If, for all loop invariant X, the predicate "LHS `Pred` X" is monotonically increasing or decreasing,...
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI std::optional< bool > evaluatePredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Check whether the condition described by Pred, LHS, and RHS is true or false.
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
This class represents the LLVM 'select' instruction.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
self_iterator getIterator()
Definition ilist_node.h:123
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
cst_pred_ty< is_one > m_scev_One()
Match an integer 1.
specificloop_ty m_SpecificLoop(const Loop *L)
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
bool match(const SCEV *S, const Pattern &P)
class_match< const SCEV > m_SCEV()
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Return either:
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
static cl::opt< bool > DisableAdvancedPeeling("disable-advanced-peeling", cl::init(false), cl::Hidden, cl::desc("Disable advance peeling. Issues for convergent targets (D134803)."))
static cl::opt< bool > UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low."))
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
static cl::opt< bool > EnablePeelingForIV("enable-peeling-for-iv", cl::init(false), cl::Hidden, cl::desc("Enable peeling to convert Phi nodes into IVs"))
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool canPeel(const Loop *L)
Definition LoopPeel.cpp:95
bool canPeelLastIteration(const Loop &L, ScalarEvolution &SE)
Returns true if the last iteration of L can be peeled off.
Definition LoopPeel.cpp:476
LLVM_ABI void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V=0)
Set input string into loop metadata by keeping other values intact.
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
static const char * PeeledCountMetaData
Definition LoopPeel.cpp:89
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:94
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache *AC=nullptr, unsigned Threshold=UINT_MAX)
Definition LoopPeel.cpp:748
LLVM_ABI cl::opt< unsigned > SCEVCheapExpansionBudget
BranchProbability getLoopProbability(Loop *L)
Based on branch weight metadata, return either:
static cl::opt< unsigned > UnrollForcePeelCount("unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information."))
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI std::optional< int > getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name)
Find named metadata for a loop with an integer value.
bool setLoopProbability(Loop *L, BranchProbability P)
Set branch weight metadata for the latch of L to indicate that, at the end of any iteration,...
TargetTransformInfo TTI
static cl::opt< bool > UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden, cl::desc("Allows loop nests to be peeled."))
static cl::opt< unsigned > UnrollPeelMaxCount("unroll-peel-max-count", cl::init(7), cl::Hidden, cl::desc("Max average trip count which will cause loop peeling."))
LLVM_ABI void cloneAndAdaptNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, ArrayRef< BasicBlock * > NewBlocks, LLVMContext &Context, StringRef Ext)
Clone the specified noalias decl scopes.
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:249
LLVM_ABI bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, std::optional< unsigned > EstimatedLoopInvocationWeight=std::nullopt)
Set llvm.loop.estimated_trip_count with the value EstimatedTripCount in the loop metadata of L.
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
LLVM_ABI void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
LLVM_ABI bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition LCSSA.cpp:427
bool peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA, ValueToValueMapTy &VMap)
VMap is the value-map that maps instructions from the original loop to instructions in the last peele...
static cl::opt< unsigned > UnrollPeelCount("unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes"))
LLVM_ABI Loop * cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM)
Recursively clone the specified loop and all of its children, mapping the blocks with the specified m...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
bool AllowPeeling
Allow peeling off loop iterations.
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
bool PeelLast
Peel off the last PeelCount loop iterations.
bool PeelProfiledIterations
Allow peeling basing on profile.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...