LLVM 23.0.0git
LoopPeel.cpp
Go to the documentation of this file.
1//===- LoopPeel.cpp -------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Loop Peeling Utilities.
10//===----------------------------------------------------------------------===//
11
13#include "llvm/ADT/DenseMap.h"
14#include "llvm/ADT/MapVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/Analysis/Loads.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Dominators.h"
26#include "llvm/IR/Function.h"
27#include "llvm/IR/InstrTypes.h"
28#include "llvm/IR/Instruction.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/IR/MDBuilder.h"
37#include "llvm/Support/Debug.h"
45#include <algorithm>
46#include <cassert>
47#include <cstdint>
48#include <optional>
49
50using namespace llvm;
51using namespace llvm::PatternMatch;
52using namespace llvm::SCEVPatternMatch;
53
54#define DEBUG_TYPE "loop-peel"
55
56STATISTIC(NumPeeled, "Number of loops peeled");
57STATISTIC(NumPeeledEnd, "Number of loops peeled from end");
58
59namespace llvm {
61 "unroll-peel-count", cl::Hidden,
62 cl::desc("Set the unroll peeling count, for testing purposes"));
63
64static cl::opt<bool>
65 UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
66 cl::desc("Allows loops to be peeled when the dynamic "
67 "trip count is known to be low."));
68
69static cl::opt<bool>
70 UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling",
71 cl::init(false), cl::Hidden,
72 cl::desc("Allows loop nests to be peeled."));
73
75 "unroll-peel-max-count", cl::init(7), cl::Hidden,
76 cl::desc("Max average trip count which will cause loop peeling."));
77
79 "unroll-force-peel-count", cl::init(0), cl::Hidden,
80 cl::desc("Force a peel count regardless of profiling information."));
81
83 "disable-advanced-peeling", cl::init(false), cl::Hidden,
85 "Disable advance peeling. Issues for convergent targets (D134803)."));
86
88 "enable-peeling-for-iv", cl::init(false), cl::Hidden,
89 cl::desc("Enable peeling to convert Phi nodes into IVs"));
90
91static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
92
94} // namespace llvm
95
96// Check whether we are capable of peeling this loop.
97bool llvm::canPeel(const Loop *L) {
98 // Make sure the loop is in simplified form
99 if (!L->isLoopSimplifyForm())
100 return false;
102 return true;
103
105 L->getUniqueNonLatchExitBlocks(Exits);
106 // The latch must either be the only exiting block or all non-latch exit
107 // blocks have either a deopt or unreachable terminator or compose a chain of
108 // blocks where the last one is either deopt or unreachable terminated. Both
109 // deopt and unreachable terminators are a strong indication they are not
110 // taken. Note that this is a profitability check, not a legality check. Also
111 // note that LoopPeeling currently can only update the branch weights of latch
112 // blocks and branch weights to blocks with deopt or unreachable do not need
113 // updating.
115}
116
117namespace {
118
119// As a loop is peeled, it may be the case that Phi nodes become
120// loop-invariant (ie, known because there is only one choice).
121// For example, consider the following function:
122// void g(int);
123// void binary() {
124// int x = 0;
125// int y = 0;
126// int a = 0;
127// for(int i = 0; i <100000; ++i) {
128// g(x);
129// x = y;
130// g(a);
131// y = a + 1;
132// a = 5;
133// }
134// }
135// Peeling 3 iterations is beneficial because the values for x, y and a
136// become known. The IR for this loop looks something like the following:
137//
138// %i = phi i32 [ 0, %entry ], [ %inc, %if.end ]
139// %a = phi i32 [ 0, %entry ], [ 5, %if.end ]
140// %y = phi i32 [ 0, %entry ], [ %add, %if.end ]
141// %x = phi i32 [ 0, %entry ], [ %y, %if.end ]
142// ...
143// tail call void @_Z1gi(i32 signext %x)
144// tail call void @_Z1gi(i32 signext %a)
145// %add = add nuw nsw i32 %a, 1
146// %inc = add nuw nsw i32 %i, 1
147// %exitcond = icmp eq i32 %inc, 100000
148// br i1 %exitcond, label %for.cond.cleanup, label %for.body
149//
150// The arguments for the calls to g will become known after 3 iterations
151// of the loop, because the phi nodes values become known after 3 iterations
152// of the loop (ie, they are known on the 4th iteration, so peel 3 iterations).
153// The first iteration has g(0), g(0); the second has g(0), g(5); the
154// third has g(1), g(5) and the fourth (and all subsequent) have g(6), g(5).
155// Now consider the phi nodes:
156// %a is a phi with constants so it is determined after iteration 1.
157// %y is a phi based on a constant and %a so it is determined on
158// the iteration after %a is determined, so iteration 2.
159// %x is a phi based on a constant and %y so it is determined on
160// the iteration after %y, so iteration 3.
161// %i is based on itself (and is an induction variable) so it is
162// never determined.
163// This means that peeling off 3 iterations will result in being able to
164// remove the phi nodes for %a, %y, and %x. The arguments for the
165// corresponding calls to g are determined and the code for computing
166// x, y, and a can be removed.
167//
168// Similarly, there are cases where peeling makes Phi nodes loop-inductions
169// (i.e., the value is increased or decreased by a fixed amount on every
170// iteration). For example, consider the following function.
171//
172// #define N 100
173// void f(int a[], int b[]) {
174// int im = N - 1;
175// for (int i = 0; i < N; i++) {
176// a[i] = b[i] + b[im];
177// im = i;
178// }
179// }
180//
181// The IR of the loop will look something like the following.
182//
183// %i = phi i32 [ 0, %entry ], [ %i.next, %for.body ]
184// %im = phi i32 [ 99, %entry ], [ %i, %for.body ]
185// ...
186// %i.next = add nuw nsw i32 %i, 1
187// ...
188//
189// In this case, %im becomes a loop-induction variable by peeling 1 iteration,
190// because %i is a loop-induction one. The peeling count can be determined by
191// the same algorithm with loop-invariant case. Such peeling is profitable for
192// loop-vectorization.
193//
194// The PhiAnalyzer class calculates how many times a loop should be
195// peeled based on the above analysis of the phi nodes in the loop while
196// respecting the maximum specified.
197class PhiAnalyzer {
198public:
199 PhiAnalyzer(const Loop &L, unsigned MaxIterations, bool PeelForIV);
200
201 // Calculate the sufficient minimum number of iterations of the loop to peel
202 // such that phi instructions become determined (subject to allowable limits)
203 std::optional<unsigned> calculateIterationsToPeel();
204
205protected:
206 enum class PeelCounterType {
207 Invariant,
208 Induction,
209 };
210
211 using PeelCounterValue = std::pair<unsigned, PeelCounterType>;
212 using PeelCounter = std::optional<PeelCounterValue>;
213 const PeelCounter Unknown = std::nullopt;
214
215 // Add 1 respecting Unknown and return Unknown if result over MaxIterations
216 PeelCounter addOne(PeelCounter PC) const {
217 if (PC == Unknown)
218 return Unknown;
219 auto [Val, Ty] = *PC;
220 return (Val + 1 <= MaxIterations) ? PeelCounter({Val + 1, Ty}) : Unknown;
221 }
222
223 // Return a value representing zero for the given counter type.
224 PeelCounter makeZero(PeelCounterType Ty) const {
225 return PeelCounter({0, Ty});
226 }
227
228 // Calculate the number of iterations after which the given value becomes an
229 // invariant or an induction.
230 PeelCounter calculate(const Value &);
231
232 // Auxiliary function to calculate the number of iterations for a comparison
233 // instruction or a binary operator.
234 PeelCounter mergeTwoCounters(const Instruction &CmpOrBinaryOp,
235 const PeelCounterValue &LHS,
236 const PeelCounterValue &RHS) const;
237
238 // Returns true if the \p Phi is an induction in the target loop. This is a
239 // lightweight check and possible to detect an IV in some cases.
240 bool isInductionPHI(const PHINode *Phi) const;
241
242 const Loop &L;
243 const unsigned MaxIterations;
244 const bool PeelForIV;
245
246 // Map of Values to number of iterations to invariance or induction
247 SmallDenseMap<const Value *, PeelCounter> IterationsToInvarianceOrInduction;
248};
249
250PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations, bool PeelForIV)
251 : L(L), MaxIterations(MaxIterations), PeelForIV(PeelForIV) {
252 assert(canPeel(&L) && "loop is not suitable for peeling");
253 assert(MaxIterations > 0 && "no peeling is allowed?");
254}
255
256/// Test whether \p Phi is an induction variable. Although this can be
257/// determined using SCEV analysis, it is expensive to compute here. Instead,
258/// we perform cheaper checks that may not detect complex cases but are
259/// sufficient for some situations.
260bool PhiAnalyzer::isInductionPHI(const PHINode *Phi) const {
261 // Currently we only support a loop that has single latch.
262 BasicBlock *Latch = L.getLoopLatch();
263 if (Latch == nullptr)
264 return false;
265
266 Value *Cur = Phi->getIncomingValueForBlock(Latch);
267 SmallPtrSet<Value *, 4> Visited;
268 bool VisitBinOp = false;
269
270 // Starting from the incoming value of the Phi, we follow the use-def chain.
271 // We consider Phi to be an IV if we can reach it again by traversing only
272 // add, sub, or cast instructions.
273 while (true) {
274 if (Cur == Phi)
275 break;
276
277 // Avoid infinite loop.
278 if (!Visited.insert(Cur).second)
279 return false;
280
281 auto *I = dyn_cast<Instruction>(Cur);
282 if (!I || !L.contains(I))
283 return false;
284
285 if (auto *Cast = dyn_cast<CastInst>(I)) {
286 Cur = Cast->getOperand(0);
287 } else if (auto *BinOp = dyn_cast<BinaryOperator>(I)) {
288 if (BinOp->getOpcode() != Instruction::Add &&
289 BinOp->getOpcode() != Instruction::Sub)
290 return false;
291 if (!isa<ConstantInt>(BinOp->getOperand(1)))
292 return false;
293
294 VisitBinOp = true;
295 Cur = BinOp->getOperand(0);
296 } else {
297 return false;
298 }
299 }
300
301 // Ignore cases where no binary operations are visited.
302 return VisitBinOp;
303}
304
305/// When either \p LHS or \p RHS is an IV, the result of \p CmpOrBinaryOp is
306/// considered an IV only if it is an addition or a subtraction. Otherwise the
307/// result can be a value that is neither a loop-invariant nor an IV.
308///
309/// If both \p LHS and \p RHS are loop-invariants, then the result of
310/// \CmpOrBinaryOp is also a loop-invariant.
311PhiAnalyzer::PeelCounter
312PhiAnalyzer::mergeTwoCounters(const Instruction &CmpOrBinaryOp,
313 const PeelCounterValue &LHS,
314 const PeelCounterValue &RHS) const {
315 auto &[LVal, LTy] = LHS;
316 auto &[RVal, RTy] = RHS;
317 unsigned NewVal = std::max(LVal, RVal);
318
319 if (LTy == PeelCounterType::Induction || RTy == PeelCounterType::Induction) {
320 if (const auto *BinOp = dyn_cast<BinaryOperator>(&CmpOrBinaryOp)) {
321 if (BinOp->getOpcode() == Instruction::Add ||
322 BinOp->getOpcode() == Instruction::Sub)
323 return PeelCounter({NewVal, PeelCounterType::Induction});
324 }
325 return Unknown;
326 }
327 return PeelCounter({NewVal, PeelCounterType::Invariant});
328}
329
330// This function calculates the number of iterations after which the value
331// becomes an invariant. The pre-calculated values are memorized in a map.
332// N.B. This number will be Unknown or <= MaxIterations.
333// The function is calculated according to the following definition:
334// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
335// F(%x) = G(%y) + 1 (N.B. [MaxIterations | Unknown] + 1 => Unknown)
336// G(%y) = 0 if %y is a loop invariant
337// G(%y) = G(%BackEdgeValue) if %y is a phi in the header block
338// G(%y) = TODO: if %y is an expression based on phis and loop invariants
339// The example looks like:
340// %x = phi(0, %a) <-- becomes invariant starting from 3rd iteration.
341// %y = phi(0, 5)
342// %a = %y + 1
343// G(%y) = Unknown otherwise (including phi not in header block)
344PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
345 // If we already know the answer, take it from the map.
346 // Otherwise, place Unknown to map to avoid infinite recursion. Such
347 // cycles can never stop on an invariant.
348 auto [I, Inserted] =
349 IterationsToInvarianceOrInduction.try_emplace(&V, Unknown);
350 if (!Inserted)
351 return I->second;
352
353 if (L.isLoopInvariant(&V))
354 // Loop invariant so known at start.
355 return (IterationsToInvarianceOrInduction[&V] =
356 makeZero(PeelCounterType::Invariant));
357 if (const PHINode *Phi = dyn_cast<PHINode>(&V)) {
358 if (Phi->getParent() != L.getHeader()) {
359 // Phi is not in header block so Unknown.
360 assert(IterationsToInvarianceOrInduction[&V] == Unknown &&
361 "unexpected value saved");
362 return Unknown;
363 }
364
365 // If Phi is an induction, register it as a starting point.
366 if (PeelForIV && isInductionPHI(Phi))
367 return (IterationsToInvarianceOrInduction[&V] =
368 makeZero(PeelCounterType::Induction));
369
370 // We need to analyze the input from the back edge and add 1.
371 Value *Input = Phi->getIncomingValueForBlock(L.getLoopLatch());
372 PeelCounter Iterations = calculate(*Input);
373 assert(IterationsToInvarianceOrInduction[Input] == Iterations &&
374 "unexpected value saved");
375 return (IterationsToInvarianceOrInduction[Phi] = addOne(Iterations));
376 }
377 if (const Instruction *I = dyn_cast<Instruction>(&V)) {
378 if (isa<CmpInst>(I) || I->isBinaryOp()) {
379 // Binary instructions get the max of the operands.
380 PeelCounter LHS = calculate(*I->getOperand(0));
381 if (LHS == Unknown)
382 return Unknown;
383 PeelCounter RHS = calculate(*I->getOperand(1));
384 if (RHS == Unknown)
385 return Unknown;
386 return (IterationsToInvarianceOrInduction[I] =
387 mergeTwoCounters(*I, *LHS, *RHS));
388 }
389 if (I->isCast())
390 // Cast instructions get the value of the operand.
391 return (IterationsToInvarianceOrInduction[I] =
392 calculate(*I->getOperand(0)));
393 }
394 // TODO: handle more expressions
395
396 // Everything else is Unknown.
397 assert(IterationsToInvarianceOrInduction[&V] == Unknown &&
398 "unexpected value saved");
399 return Unknown;
400}
401
402std::optional<unsigned> PhiAnalyzer::calculateIterationsToPeel() {
403 unsigned Iterations = 0;
404 for (auto &PHI : L.getHeader()->phis()) {
405 PeelCounter ToInvarianceOrInduction = calculate(PHI);
406 if (ToInvarianceOrInduction != Unknown) {
407 unsigned Val = ToInvarianceOrInduction->first;
408 assert(Val <= MaxIterations && "bad result in phi analysis");
409 Iterations = std::max(Iterations, Val);
410 if (Iterations == MaxIterations)
411 break;
412 }
413 }
414 assert((Iterations <= MaxIterations) && "bad result in phi analysis");
415 return Iterations ? std::optional<unsigned>(Iterations) : std::nullopt;
416}
417
418} // unnamed namespace
419
420// Try to find any invariant memory reads that will become dereferenceable in
421// the remainder loop after peeling. The load must also be used (transitively)
422// by an exit condition. Returns the number of iterations to peel off (at the
423// moment either 0 or 1).
425 DominatorTree &DT,
426 AssumptionCache *AC) {
427 // Skip loops with a single exiting block, because there should be no benefit
428 // for the heuristic below.
429 if (L.getExitingBlock())
430 return 0;
431
432 // All non-latch exit blocks must have an UnreachableInst terminator.
433 // Otherwise the heuristic below may not be profitable.
435 L.getUniqueNonLatchExitBlocks(Exits);
436 if (any_of(Exits, [](const BasicBlock *BB) {
437 return !isa<UnreachableInst>(BB->getTerminator());
438 }))
439 return 0;
440
441 // Now look for invariant loads that dominate the latch and are not known to
442 // be dereferenceable. If there are such loads and no writes, they will become
443 // dereferenceable in the loop if the first iteration is peeled off. Also
444 // collect the set of instructions controlled by such loads. Only peel if an
445 // exit condition uses (transitively) such a load.
446 BasicBlock *Header = L.getHeader();
447 BasicBlock *Latch = L.getLoopLatch();
448 SmallPtrSet<Value *, 8> LoadUsers;
449 const DataLayout &DL = L.getHeader()->getDataLayout();
450 for (BasicBlock *BB : L.blocks()) {
451 for (Instruction &I : *BB) {
452 // Calls that only access inaccessible memory can never alias with loads.
453 if (I.mayWriteToMemory() &&
454 !(isa<CallBase>(I) &&
455 cast<CallBase>(I).onlyAccessesInaccessibleMemory()))
456 return 0;
457
458 if (LoadUsers.contains(&I))
459 LoadUsers.insert_range(I.users());
460 // Do not look for reads in the header; they can already be hoisted
461 // without peeling.
462 if (BB == Header)
463 continue;
464 if (auto *LI = dyn_cast<LoadInst>(&I)) {
465 Value *Ptr = LI->getPointerOperand();
466 if (DT.dominates(BB, Latch) && L.isLoopInvariant(Ptr) &&
468 SimplifyQuery(DL, &DT, AC, LI)))
469 LoadUsers.insert_range(I.users());
470 }
471 }
472 }
473 SmallVector<BasicBlock *> ExitingBlocks;
474 L.getExitingBlocks(ExitingBlocks);
475 if (any_of(ExitingBlocks, [&LoadUsers](BasicBlock *Exiting) {
476 return LoadUsers.contains(Exiting->getTerminator());
477 }))
478 return 1;
479 return 0;
480}
481
483 const SCEV *BTC = SE.getBackedgeTakenCount(&L);
485 return false;
486
487 // Check if the exit condition of the loop can be adjusted by the peeling
488 // codegen. For now, it must
489 // * exit via the latch,
490 // * the exit condition must be a NE/EQ compare of an induction with step
491 // of 1 and must only be used by the exiting branch.
492 BasicBlock *Latch = L.getLoopLatch();
493 Value *Inc;
494 Value *Bound;
495 CmpPredicate Pred;
496 BasicBlock *Succ1;
497 BasicBlock *Succ2;
498 return Latch && Latch == L.getExitingBlock() &&
499 match(Latch->getTerminator(),
500 m_Br(m_OneUse(m_ICmp(Pred, m_Value(Inc), m_Value(Bound))),
501 m_BasicBlock(Succ1), m_BasicBlock(Succ2))) &&
502 ((Pred == CmpInst::ICMP_EQ && Succ2 == L.getHeader()) ||
503 (Pred == CmpInst::ICMP_NE && Succ1 == L.getHeader())) &&
504 Bound->getType()->isIntegerTy() &&
505 SE.isLoopInvariant(SE.getSCEV(Bound), &L) &&
506 match(SE.getSCEV(Inc),
508}
509
510/// Returns true if the last iteration can be peeled off and the condition (Pred
511/// LeftAR, RightSCEV) is known at the last iteration and the inverse condition
512/// is known at the second-to-last.
514 const SCEVAddRecExpr *LeftAR,
515 const SCEV *RightSCEV, ScalarEvolution &SE,
516 const TargetTransformInfo &TTI) {
517 if (!canPeelLastIteration(L, SE))
518 return false;
519
520 const SCEV *BTC = SE.getBackedgeTakenCount(&L);
521 SCEVExpander Expander(SE, "loop-peel");
522 if (!SE.isKnownNonZero(BTC) &&
524 L.getLoopPredecessor()->getTerminator()))
525 return false;
526
527 auto Guards = ScalarEvolution::LoopGuards::collect(&L, SE);
528 BTC = SE.applyLoopGuards(BTC, Guards);
529 RightSCEV = SE.applyLoopGuards(RightSCEV, Guards);
530 const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration(BTC, SE);
531 const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration(
532 SE.getMinusSCEV(BTC, SE.getOne(BTC->getType())), SE);
533
534 return SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), ValAtLastIter,
535 RightSCEV) &&
536 SE.isKnownPredicate(Pred, ValAtSecondToLastIter, RightSCEV);
537}
538
539// Return the number of iterations to peel off from the beginning and end of the
540// loop respectively, that make conditions in the body true/false. For example,
541// if we peel 2 iterations off the loop below, the condition i < 2 can be
542// evaluated at compile time.
543//
544// for (i = 0; i < n; i++)
545// if (i < 2)
546// ..
547// else
548// ..
549// }
550static std::pair<unsigned, unsigned>
551countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
552 const TargetTransformInfo &TTI) {
553 assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
554 unsigned DesiredPeelCount = 0;
555 unsigned DesiredPeelCountLast = 0;
556
557 // Do not peel the entire loop.
558 const SCEV *BE = SE.getConstantMaxBackedgeTakenCount(&L);
559 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(BE))
560 MaxPeelCount =
561 std::min((unsigned)SC->getAPInt().getLimitedValue() - 1, MaxPeelCount);
562
563 // Increase PeelCount while (IterVal Pred BoundSCEV) condition is satisfied;
564 // return true if inversed condition become known before reaching the
565 // MaxPeelCount limit.
566 auto PeelWhilePredicateIsKnown =
567 [&](unsigned &PeelCount, const SCEV *&IterVal, const SCEV *BoundSCEV,
568 const SCEV *Step, ICmpInst::Predicate Pred) {
569 while (PeelCount < MaxPeelCount &&
570 SE.isKnownPredicate(Pred, IterVal, BoundSCEV)) {
571 IterVal = SE.getAddExpr(IterVal, Step);
572 ++PeelCount;
573 }
574 return SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
575 BoundSCEV);
576 };
577
578 const unsigned MaxDepth = 4;
579 std::function<void(Value *, unsigned)> ComputePeelCount =
580 [&](Value *Condition, unsigned Depth) -> void {
581 if (!Condition->getType()->isIntegerTy() || Depth >= MaxDepth)
582 return;
583
584 Value *LeftVal, *RightVal;
585 if (match(Condition, m_And(m_Value(LeftVal), m_Value(RightVal))) ||
586 match(Condition, m_Or(m_Value(LeftVal), m_Value(RightVal)))) {
587 ComputePeelCount(LeftVal, Depth + 1);
588 ComputePeelCount(RightVal, Depth + 1);
589 return;
590 }
591
592 CmpPredicate Pred;
593 if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
594 return;
595
596 const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
597 const SCEV *RightSCEV = SE.getSCEV(RightVal);
598
599 // Do not consider predicates that are known to be true or false
600 // independently of the loop iteration.
601 if (SE.evaluatePredicate(Pred, LeftSCEV, RightSCEV))
602 return;
603
604 // Check if we have a condition with one AddRec and one non AddRec
605 // expression. Normalize LeftSCEV to be the AddRec.
606 if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
607 if (isa<SCEVAddRecExpr>(RightSCEV)) {
608 std::swap(LeftSCEV, RightSCEV);
610 } else
611 return;
612 }
613
614 const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
615
616 // Avoid huge SCEV computations in the loop below, make sure we only
617 // consider AddRecs of the loop we are trying to peel.
618 if (!LeftAR->isAffine() || LeftAR->getLoop() != &L)
619 return;
620 if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) &&
621 !SE.getMonotonicPredicateType(LeftAR, Pred))
622 return;
623
624 // Check if extending the current DesiredPeelCount lets us evaluate Pred
625 // or !Pred in the loop body statically.
626 unsigned NewPeelCount = DesiredPeelCount;
627
628 const SCEV *IterVal = LeftAR->evaluateAtIteration(
629 SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);
630
631 // If the original condition is not known, get the negated predicate
632 // (which holds on the else branch) and check if it is known. This allows
633 // us to peel of iterations that make the original condition false.
634 if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
636
637 const SCEV *Step = LeftAR->getStepRecurrence(SE);
638 if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, RightSCEV, Step,
639 Pred)) {
640 if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE, TTI))
641 DesiredPeelCountLast = 1;
642 return;
643 }
644
645 // However, for equality comparisons, that isn't always sufficient to
646 // eliminate the comparsion in loop body, we may need to peel one more
647 // iteration. See if that makes !Pred become unknown again.
648 const SCEV *NextIterVal = SE.getAddExpr(IterVal, Step);
649 if (ICmpInst::isEquality(Pred) &&
650 !SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), NextIterVal,
651 RightSCEV) &&
652 !SE.isKnownPredicate(Pred, IterVal, RightSCEV) &&
653 SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) {
654 if (NewPeelCount >= MaxPeelCount)
655 return; // Need to peel one more iteration, but can't. Give up.
656 ++NewPeelCount; // Great!
657 }
658
659 DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount);
660 DesiredPeelCountLast = std::max(DesiredPeelCountLast, NewPeelCount);
661 };
662
663 auto ComputePeelCountMinMax = [&](MinMaxIntrinsic *MinMax) {
664 if (!MinMax->getType()->isIntegerTy())
665 return;
666 Value *LHS = MinMax->getLHS(), *RHS = MinMax->getRHS();
667 const SCEV *BoundSCEV, *IterSCEV;
668 if (L.isLoopInvariant(LHS)) {
669 BoundSCEV = SE.getSCEV(LHS);
670 IterSCEV = SE.getSCEV(RHS);
671 } else if (L.isLoopInvariant(RHS)) {
672 BoundSCEV = SE.getSCEV(RHS);
673 IterSCEV = SE.getSCEV(LHS);
674 } else
675 return;
676 const auto *AddRec = dyn_cast<SCEVAddRecExpr>(IterSCEV);
677 // For simplicity, we support only affine recurrences.
678 if (!AddRec || !AddRec->isAffine() || AddRec->getLoop() != &L)
679 return;
680 const SCEV *Step = AddRec->getStepRecurrence(SE);
681 bool IsSigned = MinMax->isSigned();
682 // To minimize number of peeled iterations, we use strict relational
683 // predicates here.
685 if (SE.isKnownPositive(Step))
686 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
687 else if (SE.isKnownNegative(Step))
688 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
689 else
690 return;
691 // Check that AddRec is not wrapping.
692 if (!(IsSigned ? AddRec->hasNoSignedWrap() : AddRec->hasNoUnsignedWrap()))
693 return;
694 unsigned NewPeelCount = DesiredPeelCount;
695 const SCEV *IterVal = AddRec->evaluateAtIteration(
696 SE.getConstant(AddRec->getType(), NewPeelCount), SE);
697 if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, BoundSCEV, Step,
698 Pred)) {
699 if (shouldPeelLastIteration(L, Pred, AddRec, BoundSCEV, SE, TTI))
700 DesiredPeelCountLast = 1;
701 return;
702 }
703 DesiredPeelCount = NewPeelCount;
704 };
705
706 for (BasicBlock *BB : L.blocks()) {
707 for (Instruction &I : *BB) {
709 ComputePeelCount(SI->getCondition(), 0);
711 ComputePeelCountMinMax(MinMax);
712 }
713
714 auto *BI = dyn_cast<CondBrInst>(BB->getTerminator());
715 if (!BI)
716 continue;
717
718 // Ignore loop exit condition.
719 if (L.getLoopLatch() == BB)
720 continue;
721
722 ComputePeelCount(BI->getCondition(), 0);
723 }
724
725 return {DesiredPeelCount, DesiredPeelCountLast};
726}
727
728/// This "heuristic" exactly matches implicit behavior which used to exist
729/// inside getLoopEstimatedTripCount. It was added here to keep an
730/// improvement inside that API from causing peeling to become more aggressive.
731/// This should probably be removed.
733 BasicBlock *Latch = L->getLoopLatch();
734 if (!Latch)
735 return true;
736
737 CondBrInst *LatchBR = dyn_cast<CondBrInst>(Latch->getTerminator());
738 if (!LatchBR || !L->isLoopExiting(Latch))
739 return true;
740
741 assert((LatchBR->getSuccessor(0) == L->getHeader() ||
742 LatchBR->getSuccessor(1) == L->getHeader()) &&
743 "At least one edge out of the latch must go to the header");
744
746 L->getUniqueNonLatchExitBlocks(ExitBlocks);
747 return any_of(ExitBlocks, [](const BasicBlock *EB) {
748 return !EB->getTerminatingDeoptimizeCall();
749 });
750}
751
752
753// Return the number of iterations we want to peel off.
754void llvm::computePeelCount(Loop *L, unsigned LoopSize,
756 unsigned TripCount, DominatorTree &DT,
758 AssumptionCache *AC, unsigned Threshold) {
759 assert(LoopSize > 0 && "Zero loop size is not allowed!");
760 // Save the PP.PeelCount value set by the target in
761 // TTI.getPeelingPreferences or by the flag -unroll-peel-count.
762 unsigned TargetPeelCount = PP.PeelCount;
763 PP.PeelCount = 0;
764 PP.PeelLast = false;
765 if (!canPeel(L))
766 return;
767
768 // Only try to peel innermost loops by default.
769 // The constraint can be relaxed by the target in TTI.getPeelingPreferences
770 // or by the flag -unroll-allow-loop-nests-peeling.
771 if (!PP.AllowLoopNestsPeeling && !L->isInnermost())
772 return;
773
774 // If the user provided a peel count, use that.
775 bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
776 if (UserPeelCount) {
777 LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
778 << " iterations.\n");
780 PP.PeelProfiledIterations = true;
781 return;
782 }
783
784 // Skip peeling if it's disabled.
785 if (!PP.AllowPeeling)
786 return;
787
788 // Check that we can peel at least one iteration.
789 if (2 * LoopSize > Threshold)
790 return;
791
792 unsigned AlreadyPeeled = 0;
794 AlreadyPeeled = *Peeled;
795 // Stop if we already peeled off the maximum number of iterations.
796 if (AlreadyPeeled >= UnrollPeelMaxCount)
797 return;
798
799 // Pay respect to limitations implied by loop size and the max peel count.
800 unsigned MaxPeelCount = UnrollPeelMaxCount;
801 MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1);
802
803 // Start the max computation with the PP.PeelCount value set by the target
804 // in TTI.getPeelingPreferences or by the flag -unroll-peel-count.
805 unsigned DesiredPeelCount = TargetPeelCount;
806
807 // Here we try to get rid of Phis which become invariants or inductions after
808 // 1, 2, ..., N iterations of the loop. For this we compute the number for
809 // iterations after which every Phi is guaranteed to become an invariant or an
810 // induction, and try to peel the maximum number of iterations among these
811 // values, thus turning all those Phis into invariants or inductions.
812 if (MaxPeelCount > DesiredPeelCount) {
813 // Check how many iterations are useful for resolving Phis
814 auto NumPeels = PhiAnalyzer(*L, MaxPeelCount, EnablePeelingForIV)
815 .calculateIterationsToPeel();
816 if (NumPeels)
817 DesiredPeelCount = std::max(DesiredPeelCount, *NumPeels);
818 }
819
820 const auto &[CountToEliminateCmps, CountToEliminateCmpsLast] =
821 countToEliminateCompares(*L, MaxPeelCount, SE, TTI);
822 DesiredPeelCount = std::max(DesiredPeelCount, CountToEliminateCmps);
823
824 if (DesiredPeelCount == 0)
825 DesiredPeelCount = peelToTurnInvariantLoadsDereferenceable(*L, DT, AC);
826
827 if (DesiredPeelCount > 0) {
828 DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
829 // Consider max peel count limitation.
830 assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
831 if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
832 LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
833 << " iteration(s) to turn"
834 << " some Phis into invariants or inductions.\n");
835 PP.PeelCount = DesiredPeelCount;
836 PP.PeelProfiledIterations = false;
837 PP.PeelLast = false;
838 return;
839 }
840 }
841
842 if (CountToEliminateCmpsLast > 0) {
843 unsigned DesiredPeelCountLast =
844 std::min(CountToEliminateCmpsLast, MaxPeelCount);
845 // Consider max peel count limitation.
846 assert(DesiredPeelCountLast > 0 && "Wrong loop size estimation?");
847 if (DesiredPeelCountLast + AlreadyPeeled <= UnrollPeelMaxCount) {
848 LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
849 << " iteration(s) to turn"
850 << " some Phis into invariants.\n");
851 PP.PeelCount = DesiredPeelCountLast;
852 PP.PeelProfiledIterations = false;
853 PP.PeelLast = true;
854 return;
855 }
856 }
857
858 // Bail if we know the statically calculated trip count.
859 // In this case we rather prefer partial unrolling.
860 if (TripCount)
861 return;
862
863 // Do not apply profile base peeling if it is disabled.
865 return;
866 // If we don't know the trip count, but have reason to believe the average
867 // trip count is low, peeling should be beneficial, since we will usually
868 // hit the peeled section.
869 // We only do this in the presence of profile information, since otherwise
870 // our estimates of the trip count are not reliable enough.
871 if (L->getHeader()->getParent()->hasProfileData()) {
873 return;
874 std::optional<unsigned> EstimatedTripCount = getLoopEstimatedTripCount(L);
875 if (!EstimatedTripCount)
876 return;
877
878 LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is "
879 << *EstimatedTripCount << "\n");
880
881 std::optional<unsigned> TotalPeeled =
882 llvm::checkedAddUnsigned(*EstimatedTripCount, AlreadyPeeled);
883 if (TotalPeeled && *TotalPeeled <= MaxPeelCount) {
884 unsigned PeelCount = *EstimatedTripCount;
885 LLVM_DEBUG(dbgs() << "Peeling first " << PeelCount << " iterations.\n");
886 PP.PeelCount = PeelCount;
887 return;
888 }
889 LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n");
890 LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
891 LLVM_DEBUG(dbgs() << "Loop cost: " << LoopSize << "\n");
892 LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n");
893 LLVM_DEBUG(dbgs() << "Max peel count by cost: "
894 << (Threshold / LoopSize - 1) << "\n");
895 }
896}
897
898/// Clones the body of the loop L, putting it between \p InsertTop and \p
899/// InsertBot.
900/// \param IterNumber The serial number of the iteration currently being
901/// peeled off.
902/// \param PeelLast Peel off the last iterations from \p L.
903/// \param ExitEdges The exit edges of the original loop.
904/// \param[out] NewBlocks A list of the blocks in the newly created clone
905/// \param[out] VMap The value map between the loop and the new clone.
906/// \param LoopBlocks A helper for DFS-traversal of the loop.
907/// \param LVMap A value-map that maps instructions from the original loop to
908/// instructions in the last peeled-off iteration.
909static void cloneLoopBlocks(
910 Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop,
911 BasicBlock *InsertBot, BasicBlock *OrigPreHeader,
912 SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
913 SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
915 LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes,
916 ScalarEvolution &SE) {
917 BasicBlock *Header = L->getHeader();
918 BasicBlock *Latch = L->getLoopLatch();
919 BasicBlock *PreHeader = L->getLoopPreheader();
920
921 Function *F = Header->getParent();
922 LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
923 LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
924 Loop *ParentLoop = L->getParentLoop();
925
926 // For each block in the original loop, create a new copy,
927 // and update the value map with the newly created values.
928 for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
929 BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
930 NewBlocks.push_back(NewBB);
931
932 // If an original block is an immediate child of the loop L, its copy
933 // is a child of a ParentLoop after peeling. If a block is a child of
934 // a nested loop, it is handled in the cloneLoop() call below.
935 if (ParentLoop && LI->getLoopFor(*BB) == L)
936 ParentLoop->addBasicBlockToLoop(NewBB, *LI);
937
938 VMap[*BB] = NewBB;
939
940 // If dominator tree is available, insert nodes to represent cloned blocks.
941 if (DT) {
942 if (Header == *BB)
943 DT->addNewBlock(NewBB, InsertTop);
944 else {
945 DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
946 // VMap must contain entry for IDom, as the iteration order is RPO.
947 DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
948 }
949 }
950 }
951
952 {
953 // Identify what other metadata depends on the cloned version. After
954 // cloning, replace the metadata with the corrected version for both
955 // memory instructions and noalias intrinsics.
956 std::string Ext = (Twine("Peel") + Twine(IterNumber)).str();
957 cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
958 Header->getContext(), Ext);
959 }
960
961 // Recursively create the new Loop objects for nested loops, if any,
962 // to preserve LoopInfo.
963 for (Loop *ChildLoop : *L) {
964 cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr);
965 }
966
967 // Hook-up the control flow for the newly inserted blocks.
968 // The new header is hooked up directly to the "top", which is either
969 // the original loop preheader (for the first iteration) or the previous
970 // iteration's exiting block (for every other iteration)
971 InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));
972
973 // Similarly, for the latch:
974 // The original exiting edge is still hooked up to the loop exit.
975 BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
976 if (PeelLast) {
977 // This is the last iteration and we definitely will go to the exit. Just
978 // set both successors to InsertBot and let the branch be simplified later.
979 assert(IterNumber == 0 && "Only peeling a single iteration implemented.");
980 auto *LatchTerm = cast<CondBrInst>(NewLatch->getTerminator());
981 LatchTerm->setSuccessor(0, InsertBot);
982 LatchTerm->setSuccessor(1, InsertBot);
983 } else {
984 auto *LatchTerm = cast<Instruction>(NewLatch->getTerminator());
985 // The backedge now goes to the "bottom", which is either the loop's real
986 // header (for the last peeled iteration) or the copied header of the next
987 // iteration (for every other iteration)
988 for (unsigned idx = 0, e = LatchTerm->getNumSuccessors(); idx < e; ++idx) {
989 if (LatchTerm->getSuccessor(idx) == Header) {
990 LatchTerm->setSuccessor(idx, InsertBot);
991 break;
992 }
993 }
994 }
995 if (DT)
996 DT->changeImmediateDominator(InsertBot, NewLatch);
997
998 // The new copy of the loop body starts with a bunch of PHI nodes
999 // that pick an incoming value from either the preheader, or the previous
1000 // loop iteration. Since this copy is no longer part of the loop, we
1001 // resolve this statically:
1002 if (PeelLast) {
1003 // For the last iteration, we introduce new phis for each header phi in
1004 // InsertTop, using the incoming value from the preheader for the original
1005 // preheader (when skipping the main loop) and the incoming value from the
1006 // latch for the latch (when continuing from the main loop).
1007 IRBuilder<> B(InsertTop, InsertTop->getFirstNonPHIIt());
1008 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
1009 PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
1010 PHINode *PN = B.CreatePHI(NewPHI->getType(), 2);
1011 NewPHI->eraseFromParent();
1012 if (OrigPreHeader)
1013 PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(PreHeader),
1014 OrigPreHeader);
1015
1016 PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(Latch),
1017 Latch);
1018 VMap[&*I] = PN;
1019 }
1020 } else {
1021 // For the first iteration, we use the value from the preheader directly.
1022 // For any other iteration, we replace the phi with the value generated by
1023 // the immediately preceding clone of the loop body (which represents
1024 // the previous iteration).
1025 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
1026 PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
1027 if (IterNumber == 0) {
1028 VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader);
1029 } else {
1030 Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch);
1031 Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
1032 if (LatchInst && L->contains(LatchInst))
1033 VMap[&*I] = LVMap[LatchInst];
1034 else
1035 VMap[&*I] = LatchVal;
1036 }
1037 NewPHI->eraseFromParent();
1038 }
1039 }
1040
1041 // Fix up the outgoing values - we need to add a value for the iteration
1042 // we've just created. Note that this must happen *after* the incoming
1043 // values are adjusted, since the value going out of the latch may also be
1044 // a value coming into the header.
1045 for (auto Edge : ExitEdges)
1046 for (PHINode &PHI : Edge.second->phis()) {
1047 Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
1048 Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
1049 if (LatchInst && L->contains(LatchInst))
1050 LatchVal = VMap[LatchVal];
1051 PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
1053 }
1054
1055 // LastValueMap is updated with the values for the current loop
1056 // which are used the next time this function is called.
1057 for (auto KV : VMap)
1058 LVMap[KV.first] = KV.second;
1059}
1060
1061TargetTransformInfo::PeelingPreferences
1063 const TargetTransformInfo &TTI,
1064 std::optional<bool> UserAllowPeeling,
1065 std::optional<bool> UserAllowProfileBasedPeeling,
1066 bool UnrollingSpecficValues) {
1068
1069 // Set the default values.
1070 PP.PeelCount = 0;
1071 PP.AllowPeeling = true;
1072 PP.AllowLoopNestsPeeling = false;
1073 PP.PeelLast = false;
1074 PP.PeelProfiledIterations = true;
1075
1076 // Get the target specifc values.
1077 TTI.getPeelingPreferences(L, SE, PP);
1078
1079 // User specified values using cl::opt.
1080 if (UnrollingSpecficValues) {
1081 if (UnrollPeelCount.getNumOccurrences() > 0)
1083 if (UnrollAllowPeeling.getNumOccurrences() > 0)
1085 if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
1087 }
1088
1089 // User specifed values provided by argument.
1090 if (UserAllowPeeling)
1091 PP.AllowPeeling = *UserAllowPeeling;
1092 if (UserAllowProfileBasedPeeling)
1093 PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
1094
1095 return PP;
1096}
1097
1098/// Peel off the first \p PeelCount iterations of loop \p L.
1099///
1100/// Note that this does not peel them off as a single straight-line block.
1101/// Rather, each iteration is peeled off separately, and needs to check the
1102/// exit condition.
1103/// For loops that dynamically execute \p PeelCount iterations or less
1104/// this provides a benefit, since the peeled off iterations, which account
1105/// for the bulk of dynamic execution, can be further simplified by scalar
1106/// optimizations.
1107void llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
1109 bool PreserveLCSSA, ValueToValueMapTy &LVMap) {
1110 assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
1111 assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
1112 assert((!PeelLast || (canPeelLastIteration(*L, *SE) && PeelCount == 1)) &&
1113 "when peeling the last iteration, the loop must be supported and can "
1114 "only peel a single iteration");
1115
1116 LoopBlocksDFS LoopBlocks(L);
1117 LoopBlocks.perform(LI);
1118
1119 BasicBlock *Header = L->getHeader();
1120 BasicBlock *PreHeader = L->getLoopPreheader();
1121 BasicBlock *Latch = L->getLoopLatch();
1123 L->getExitEdges(ExitEdges);
1124
1125 // Remember dominators of blocks we might reach through exits to change them
1126 // later. Immediate dominator of such block might change, because we add more
1127 // routes which can lead to the exit: we can reach it from the peeled
1128 // iterations too.
1129 MapVector<BasicBlock *, BasicBlock *> NonLoopBlocksIDom;
1130 for (auto *BB : L->blocks()) {
1131 auto *BBDomNode = DT.getNode(BB);
1132 SmallVector<BasicBlock *, 16> ChildrenToUpdate;
1133 for (auto *ChildDomNode : BBDomNode->children()) {
1134 auto *ChildBB = ChildDomNode->getBlock();
1135 if (!L->contains(ChildBB))
1136 ChildrenToUpdate.push_back(ChildBB);
1137 }
1138 // The new idom of the block will be the nearest common dominator
1139 // of all copies of the previous idom. This is equivalent to the
1140 // nearest common dominator of the previous idom and the first latch,
1141 // which dominates all copies of the previous idom.
1142 BasicBlock *NewIDom = DT.findNearestCommonDominator(BB, Latch);
1143 for (auto *ChildBB : ChildrenToUpdate)
1144 NonLoopBlocksIDom[ChildBB] = NewIDom;
1145 }
1146
1147 Function *F = Header->getParent();
1148
1149 // Set up all the necessary basic blocks.
1150 BasicBlock *InsertTop;
1151 BasicBlock *InsertBot;
1152 BasicBlock *NewPreHeader = nullptr;
1154 if (PeelLast) {
1155 // It is convenient to split the single exit block from the latch the
1156 // into 3 parts - two blocks to anchor the peeled copy of the loop body,
1157 // and a new final exit block.
1158
1159 // Peeling the last iteration transforms.
1160 //
1161 // PreHeader:
1162 // ...
1163 // Header:
1164 // LoopBody
1165 // If (cond) goto Header
1166 // Exit:
1167 //
1168 // into
1169 //
1170 // Header:
1171 // LoopBody
1172 // If (cond) goto Header
1173 // InsertTop:
1174 // LoopBody
1175 // If (!cond) goto InsertBot
1176 // InsertBot:
1177 // Exit:
1178 // ...
1179 BasicBlock *Exit = L->getExitBlock();
1180 for (PHINode &P : Exit->phis())
1181 ExitValues[&P] = P.getIncomingValueForBlock(Latch);
1182
1183 const SCEV *BTC = SE->getBackedgeTakenCount(L);
1184
1185 InsertTop = SplitEdge(Latch, Exit, &DT, LI);
1186 InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
1187
1188 InsertTop->setName(Exit->getName() + ".peel.begin");
1189 InsertBot->setName(Exit->getName() + ".peel.next");
1190 NewPreHeader = nullptr;
1191
1192 // If the original loop may only execute a single iteration we need to
1193 // insert a trip count check and skip the original loop with the last
1194 // iteration peeled off if necessary. Either way, we must update branch
1195 // weights to maintain the loop body frequency.
1196 if (SE->isKnownNonZero(BTC)) {
1197 // We have just proven that, when reached, the original loop always
1198 // executes at least two iterations. Thus, we unconditionally execute
1199 // both the remaining loop's initial iteration and the peeled iteration.
1200 // But that increases the latter's frequency above its frequency in the
1201 // original loop. To maintain the total frequency, we compensate by
1202 // decreasing the remaining loop body's frequency to indicate one less
1203 // iteration.
1204 //
1205 // We use this formula to convert probability to/from frequency:
1206 // Sum(i=0..inf)(P^i) = 1/(1-P) = Freq.
1207 if (BranchProbability P = getLoopProbability(L); !P.isUnknown()) {
1208 // Trying to subtract one from an infinite loop is pointless, and our
1209 // formulas then produce division by zero, so skip that case.
1210 if (BranchProbability ExitP = P.getCompl(); !ExitP.isZero()) {
1211 double Freq = 1 / ExitP.toDouble();
1212 // No branch weights can produce a frequency of less than one given
1213 // the initial iteration, and our formulas produce a negative
1214 // probability if we try.
1215 assert(Freq >= 1.0 && "expected freq >= 1 due to initial iteration");
1216 double NewFreq = std::max(Freq - 1, 1.0);
1218 L, BranchProbability::getBranchProbability(1 - 1 / NewFreq));
1219 }
1220 }
1221 } else {
1222 NewPreHeader = SplitEdge(PreHeader, Header, &DT, LI);
1223 SCEVExpander Expander(*SE, "loop-peel");
1224
1225 Instruction *PreHeaderBR = PreHeader->getTerminator();
1226 Value *BTCValue =
1227 Expander.expandCodeFor(BTC, BTC->getType(), PreHeaderBR);
1228 IRBuilder<> B(PreHeaderBR);
1229 Value *Cond =
1230 B.CreateICmpNE(BTCValue, ConstantInt::get(BTCValue->getType(), 0));
1231 auto *BI = B.CreateCondBr(Cond, NewPreHeader, InsertTop);
1232 SmallVector<uint32_t> Weights;
1233 auto *OrigLatchBr = Latch->getTerminator();
1234 auto HasBranchWeights = !ProfcheckDisableMetadataFixes &&
1235 extractBranchWeights(*OrigLatchBr, Weights);
1236 if (HasBranchWeights) {
1237 // The probability that the new guard skips the loop to execute just one
1238 // iteration is the original loop's probability of exiting at the latch
1239 // after any iteration. That should maintain the original loop body
1240 // frequency. Upon arriving at the loop, due to the guard, the
1241 // probability of reaching iteration i of the new loop is the
1242 // probability of reaching iteration i+1 of the original loop. The
1243 // probability of reaching the peeled iteration is 1, which is the
1244 // probability of reaching iteration 0 of the original loop.
1245 if (L->getExitBlock() == OrigLatchBr->getSuccessor(0))
1246 std::swap(Weights[0], Weights[1]);
1247 setBranchWeights(*BI, Weights, /*IsExpected=*/false);
1248 }
1249 PreHeaderBR->eraseFromParent();
1250
1251 // PreHeader now dominates InsertTop.
1252 DT.changeImmediateDominator(InsertTop, PreHeader);
1253 }
1254 } else {
1255 // It is convenient to split the preheader into 3 parts - two blocks to
1256 // anchor the peeled copy of the loop body, and a new preheader for the
1257 // "real" loop.
1258
1259 // Peeling the first iteration transforms.
1260 //
1261 // PreHeader:
1262 // ...
1263 // Header:
1264 // LoopBody
1265 // If (cond) goto Header
1266 // Exit:
1267 //
1268 // into
1269 //
1270 // InsertTop:
1271 // LoopBody
1272 // If (!cond) goto Exit
1273 // InsertBot:
1274 // NewPreHeader:
1275 // ...
1276 // Header:
1277 // LoopBody
1278 // If (cond) goto Header
1279 // Exit:
1280 //
1281 // Each following iteration will split the current bottom anchor in two,
1282 // and put the new copy of the loop body between these two blocks. That
1283 // is, after peeling another iteration from the example above, we'll
1284 // split InsertBot, and get:
1285 //
1286 // InsertTop:
1287 // LoopBody
1288 // If (!cond) goto Exit
1289 // InsertBot:
1290 // LoopBody
1291 // If (!cond) goto Exit
1292 // InsertBot.next:
1293 // NewPreHeader:
1294 // ...
1295 // Header:
1296 // LoopBody
1297 // If (cond) goto Header
1298 // Exit:
1299 //
1300 InsertTop = SplitEdge(PreHeader, Header, &DT, LI);
1301 InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
1302 NewPreHeader = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
1303
1304 InsertTop->setName(Header->getName() + ".peel.begin");
1305 InsertBot->setName(Header->getName() + ".peel.next");
1306 NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
1307 }
1308
1309 Instruction *LatchTerm =
1310 cast<Instruction>(cast<BasicBlock>(Latch)->getTerminator());
1311
1312 // Identify what noalias metadata is inside the loop: if it is inside the
1313 // loop, the associated metadata must be cloned for each iteration.
1314 SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
1315 identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
1316
1317 // For each peeled-off iteration, make a copy of the loop.
1318 ValueToValueMapTy VMap;
1319 for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
1321
1322 cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot,
1323 NewPreHeader ? PreHeader : nullptr, ExitEdges, NewBlocks,
1324 LoopBlocks, VMap, LVMap, &DT, LI,
1325 LoopLocalNoAliasDeclScopes, *SE);
1326
1327 // Remap to use values from the current iteration instead of the
1328 // previous one.
1329 remapInstructionsInBlocks(NewBlocks, VMap);
1330
1331 if (Iter == 0) {
1332 if (PeelLast) {
1333 // Adjust the exit condition so the loop exits one iteration early.
1334 // For now we simply subtract one form the second operand of the
1335 // exit condition. This relies on the peel count computation to
1336 // check that this is actually legal. In particular, it ensures that
1337 // the first operand of the compare is an AddRec with step 1 and we
1338 // execute more than one iteration.
1339 auto *Cmp =
1340 cast<ICmpInst>(L->getLoopLatch()->getTerminator()->getOperand(0));
1341 IRBuilder B(Cmp);
1342 Cmp->setOperand(
1343 1, B.CreateSub(Cmp->getOperand(1),
1344 ConstantInt::get(Cmp->getOperand(1)->getType(), 1)));
1345 } else {
1346 // Update IDoms of the blocks reachable through exits.
1347 for (auto BBIDom : NonLoopBlocksIDom)
1348 DT.changeImmediateDominator(BBIDom.first,
1349 cast<BasicBlock>(LVMap[BBIDom.second]));
1350 }
1351 }
1352
1353#ifdef EXPENSIVE_CHECKS
1354 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1355#endif
1356
1357 // Remove Loop metadata from the latch branch instruction
1358 // because it is not the Loop's latch branch anymore.
1359 auto *LatchTermCopy = cast<Instruction>(VMap[LatchTerm]);
1360 LatchTermCopy->setMetadata(LLVMContext::MD_loop, nullptr);
1361
1362 InsertTop = InsertBot;
1363 InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
1364 InsertBot->setName(Header->getName() + ".peel.next");
1365
1366 F->splice(InsertTop->getIterator(), F, NewBlocks[0]->getIterator(),
1367 F->end());
1368 }
1369
1370 if (PeelLast) {
1371 // Now adjust users of the original exit values by replacing them with the
1372 // exit value from the peeled iteration and remove them.
1373 for (const auto &[P, E] : ExitValues) {
1374 Instruction *ExitInst = dyn_cast<Instruction>(E);
1375 if (ExitInst && L->contains(ExitInst))
1376 P->replaceAllUsesWith(&*VMap[ExitInst]);
1377 else
1378 P->replaceAllUsesWith(E);
1379 P->eraseFromParent();
1380 }
1381 formLCSSA(*L, DT, LI, SE);
1382 } else {
1383 // Now adjust the phi nodes in the loop header to get their initial values
1384 // from the last peeled-off iteration instead of the preheader.
1385 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
1387 Value *NewVal = PHI->getIncomingValueForBlock(Latch);
1388 Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
1389 if (LatchInst && L->contains(LatchInst))
1390 NewVal = LVMap[LatchInst];
1391
1392 PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
1393 }
1394 }
1395
1396 // Update Metadata for count of peeled off iterations.
1397 unsigned AlreadyPeeled = 0;
1399 AlreadyPeeled = *Peeled;
1400 unsigned TotalPeeled = AlreadyPeeled + PeelCount;
1402
1403 // Update metadata for the estimated trip count. The original branch weight
1404 // metadata is already correct for both the remaining loop and the peeled loop
1405 // iterations, so do not adjust it.
1406 //
1407 // For example, consider what happens when peeling 2 iterations from a loop
1408 // with an estimated trip count of 10 and inserting them before the remaining
1409 // loop. Each of the peeled iterations and each iteration in the remaining
1410 // loop still has the same probability of exiting the *entire original* loop
1411 // as it did when in the original loop, and thus it should still have the same
1412 // branch weights. The peeled iterations' non-zero probabilities of exiting
1413 // already appropriately reduce the probability of reaching the remaining
1414 // iterations just as they did in the original loop. Trying to also adjust
1415 // the remaining loop's branch weights to reflect its new trip count of 8 will
1416 // erroneously further reduce its block frequencies. However, in case an
1417 // analysis later needs to determine the trip count of the remaining loop
1418 // while examining it in isolation without considering the probability of
1419 // actually reaching it, we store the new trip count as separate metadata.
1420 if (auto EstimatedTripCount = getLoopEstimatedTripCount(L)) {
1421 unsigned EstimatedTripCountNew = *EstimatedTripCount;
1422 if (EstimatedTripCountNew < TotalPeeled)
1423 EstimatedTripCountNew = 0;
1424 else
1425 EstimatedTripCountNew -= TotalPeeled;
1426 setLoopEstimatedTripCount(L, EstimatedTripCountNew);
1427 }
1428
1429 if (Loop *ParentLoop = L->getParentLoop())
1430 L = ParentLoop;
1431
1432 // We modified the loop, update SE.
1433 SE->forgetTopmostLoop(L);
1435
1436#ifdef EXPENSIVE_CHECKS
1437 // Finally DomtTree must be correct.
1438 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1439#endif
1440
1441 // FIXME: Incrementally update loop-simplify
1442 simplifyLoop(L, &DT, LI, SE, AC, nullptr, PreserveLCSSA);
1443
1444 NumPeeled++;
1445 NumPeeledEnd += PeelLast;
1446}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the DenseMap class.
static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred, const SCEVAddRecExpr *LeftAR, const SCEV *RightSCEV, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Returns true if the last iteration can be peeled off and the condition (Pred LeftAR,...
Definition LoopPeel.cpp:513
static bool violatesLegacyMultiExitLoopCheck(Loop *L)
This "heuristic" exactly matches implicit behavior which used to exist inside getLoopEstimatedTripCou...
Definition LoopPeel.cpp:732
static std::pair< unsigned, unsigned > countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Definition LoopPeel.cpp:551
static void cloneLoopBlocks(Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *OrigPreHeader, SmallVectorImpl< std::pair< BasicBlock *, BasicBlock * > > &ExitEdges, SmallVectorImpl< BasicBlock * > &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, LoopInfo *LI, ArrayRef< MDNode * > LoopLocalNoAliasDeclScopes, ScalarEvolution &SE)
Clones the body of the loop L, putting it between InsertTop and InsertBot.
Definition LoopPeel.cpp:909
static unsigned peelToTurnInvariantLoadsDereferenceable(Loop &L, DominatorTree &DT, AssumptionCache *AC)
Definition LoopPeel.cpp:424
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file implements a map that provides insertion order iteration.
#define P(N)
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ ICMP_NE
not equal
Definition InstrTypes.h:762
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:852
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Conditional Branch instruction.
BasicBlock * getSuccessor(unsigned i) const
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
DomTreeNodeBase * getIDom() const
NodeT * getBlock() const
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:151
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2868
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Store the result of a depth first search within basic blocks contained by a single loop.
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
LLVM_ABI void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
RPOIterator endRPO() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:38
This class represents min/max intrinsics.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
This node represents a polynomial recurrence on the trip count of the specified loop.
LLVM_ABI const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
SCEVUse getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
This class uses information about analyze scalars to rewrite expressions in canonical form.
bool isHighCostExpansion(ArrayRef< const SCEV * > Exprs, Loop *L, unsigned Budget, const TargetTransformInfo *TTI, const Instruction *At)
Return true for expressions that can't be evaluated at runtime within given Budget.
LLVM_ABI Value * expandCodeFor(SCEVUse SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
static LLVM_ABI LoopGuards collect(const Loop *L, ScalarEvolution &SE)
Collect rewrite map for loop guards for loop L, together with flags indicating if NUW and NSW can be ...
The main scalar evolution driver.
const SCEV * getConstantMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEVConstant that is greater than or equal to (i.e.
LLVM_ABI bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI void forgetTopmostLoop(const Loop *L)
LLVM_ABI void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
LLVM_ABI void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V)
Forget LCSSA phi node V of loop L to which a new predecessor was added, such that it may no longer be...
LLVM_ABI std::optional< MonotonicPredicateType > getMonotonicPredicateType(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred)
If, for all loop invariant X, the predicate "LHS `Pred` X" is monotonically increasing or decreasing,...
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI std::optional< bool > evaluatePredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Check whether the condition described by Pred, LHS, and RHS is true or false.
This class represents the LLVM 'select' instruction.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:394
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
self_iterator getIterator()
Definition ilist_node.h:123
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_BasicBlock()
Match an arbitrary basic block value and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
brc_match< Cond_t, match_bind< BasicBlock >, match_bind< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
cst_pred_ty< is_one > m_scev_One()
Match an integer 1.
specificloop_ty m_SpecificLoop(const Loop *L)
bool match(const SCEV *S, const Pattern &P)
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI cl::opt< bool > ProfcheckDisableMetadataFixes
Definition LoopInfo.cpp:60
LLVM_ABI std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Return either:
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
static cl::opt< bool > DisableAdvancedPeeling("disable-advanced-peeling", cl::init(false), cl::Hidden, cl::desc("Disable advance peeling. Issues for convergent targets (D134803)."))
static cl::opt< bool > UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low."))
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
static cl::opt< bool > EnablePeelingForIV("enable-peeling-for-iv", cl::init(false), cl::Hidden, cl::desc("Enable peeling to convert Phi nodes into IVs"))
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool canPeel(const Loop *L)
Definition LoopPeel.cpp:97
LLVM_ABI bool canPeelLastIteration(const Loop &L, ScalarEvolution &SE)
Returns true if the last iteration of L can be peeled off.
Definition LoopPeel.cpp:482
LLVM_ABI void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V=0)
Set input string into loop metadata by keeping other values intact.
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
static const char * PeeledCountMetaData
Definition LoopPeel.cpp:91
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:94
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache *AC=nullptr, unsigned Threshold=UINT_MAX)
Definition LoopPeel.cpp:754
LLVM_ABI cl::opt< unsigned > SCEVCheapExpansionBudget
LLVM_ABI BranchProbability getLoopProbability(Loop *L)
Based on branch weight metadata, return either:
static cl::opt< unsigned > UnrollForcePeelCount("unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information."))
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI void peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA, ValueToValueMapTy &VMap)
VMap is the value-map that maps instructions from the original loop to instructions in the last peele...
LLVM_ABI std::optional< int > getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name)
Find named metadata for a loop with an integer value.
LLVM_ABI bool setLoopProbability(Loop *L, BranchProbability P)
Set branch weight metadata for the latch of L to indicate that, at the end of any iteration,...
TargetTransformInfo TTI
static cl::opt< bool > UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden, cl::desc("Allows loop nests to be peeled."))
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
static cl::opt< unsigned > UnrollPeelMaxCount("unroll-peel-max-count", cl::init(7), cl::Hidden, cl::desc("Max average trip count which will cause loop peeling."))
LLVM_ABI void cloneAndAdaptNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, ArrayRef< BasicBlock * > NewBlocks, LLVMContext &Context, StringRef Ext)
Clone the specified noalias decl scopes.
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
std::enable_if_t< std::is_unsigned_v< T >, std::optional< T > > checkedAddUnsigned(T LHS, T RHS)
Add two unsigned integers LHS and RHS.
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, std::optional< unsigned > EstimatedLoopInvocationWeight=std::nullopt)
Set llvm.loop.estimated_trip_count with the value EstimatedTripCount in the loop metadata of L.
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const SimplifyQuery &Q, bool IgnoreFree=false)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:265
LLVM_ABI void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
LLVM_ABI bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition LCSSA.cpp:427
static cl::opt< unsigned > UnrollPeelCount("unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes"))
LLVM_ABI Loop * cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM)
Recursively clone the specified loop and all of its children, mapping the blocks with the specified m...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
bool AllowPeeling
Allow peeling off loop iterations.
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
bool PeelLast
Peel off the last PeelCount loop iterations.
bool PeelProfiledIterations
Allow peeling basing on profile.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...