LLVM  16.0.0git
LoopPredication.cpp
Go to the documentation of this file.
1 //===-- LoopPredication.cpp - Guard based loop predication pass -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The LoopPredication pass tries to convert loop variant range checks to loop
10 // invariant by widening checks across loop iterations. For example, it will
11 // convert
12 //
13 // for (i = 0; i < n; i++) {
14 // guard(i < len);
15 // ...
16 // }
17 //
18 // to
19 //
20 // for (i = 0; i < n; i++) {
21 // guard(n - 1 < len);
22 // ...
23 // }
24 //
25 // After this transformation the condition of the guard is loop invariant, so
26 // loop-unswitch can later unswitch the loop by this condition which basically
27 // predicates the loop by the widened condition:
28 //
29 // if (n - 1 < len)
30 // for (i = 0; i < n; i++) {
31 // ...
32 // }
33 // else
34 // deoptimize
35 //
36 // It's tempting to rely on SCEV here, but it has proven to be problematic.
37 // Generally the facts SCEV provides about the increment step of add
38 // recurrences are true if the backedge of the loop is taken, which implicitly
39 // assumes that the guard doesn't fail. Using these facts to optimize the
40 // guard results in a circular logic where the guard is optimized under the
41 // assumption that it never fails.
42 //
43 // For example, in the loop below the induction variable will be marked as nuw
44 // basing on the guard. Basing on nuw the guard predicate will be considered
45 // monotonic. Given a monotonic condition it's tempting to replace the induction
46 // variable in the condition with its value on the last iteration. But this
47 // transformation is not correct, e.g. e = 4, b = 5 breaks the loop.
48 //
49 // for (int i = b; i != e; i++)
50 // guard(i u< len)
51 //
52 // One of the ways to reason about this problem is to use an inductive proof
53 // approach. Given the loop:
54 //
55 // if (B(0)) {
56 // do {
57 // I = PHI(0, I.INC)
58 // I.INC = I + Step
59 // guard(G(I));
60 // } while (B(I));
61 // }
62 //
63 // where B(x) and G(x) are predicates that map integers to booleans, we want a
64 // loop invariant expression M such the following program has the same semantics
65 // as the above:
66 //
67 // if (B(0)) {
68 // do {
69 // I = PHI(0, I.INC)
70 // I.INC = I + Step
71 // guard(G(0) && M);
72 // } while (B(I));
73 // }
74 //
75 // One solution for M is M = forall X . (G(X) && B(X)) => G(X + Step)
76 //
77 // Informal proof that the transformation above is correct:
78 //
79 // By the definition of guards we can rewrite the guard condition to:
80 // G(I) && G(0) && M
81 //
82 // Let's prove that for each iteration of the loop:
83 // G(0) && M => G(I)
84 // And the condition above can be simplified to G(Start) && M.
85 //
86 // Induction base.
87 // G(0) && M => G(0)
88 //
89 // Induction step. Assuming G(0) && M => G(I) on the subsequent
90 // iteration:
91 //
92 // B(I) is true because it's the backedge condition.
93 // G(I) is true because the backedge is guarded by this condition.
94 //
95 // So M = forall X . (G(X) && B(X)) => G(X + Step) implies G(I + Step).
96 //
97 // Note that we can use anything stronger than M, i.e. any condition which
98 // implies M.
99 //
100 // When S = 1 (i.e. forward iterating loop), the transformation is supported
101 // when:
102 // * The loop has a single latch with the condition of the form:
103 // B(X) = latchStart + X <pred> latchLimit,
104 // where <pred> is u<, u<=, s<, or s<=.
105 // * The guard condition is of the form
106 // G(X) = guardStart + X u< guardLimit
107 //
108 // For the ult latch comparison case M is:
109 // forall X . guardStart + X u< guardLimit && latchStart + X <u latchLimit =>
110 // guardStart + X + 1 u< guardLimit
111 //
112 // The only way the antecedent can be true and the consequent can be false is
113 // if
114 // X == guardLimit - 1 - guardStart
115 // (and guardLimit is non-zero, but we won't use this latter fact).
116 // If X == guardLimit - 1 - guardStart then the second half of the antecedent is
117 // latchStart + guardLimit - 1 - guardStart u< latchLimit
118 // and its negation is
119 // latchStart + guardLimit - 1 - guardStart u>= latchLimit
120 //
121 // In other words, if
122 // latchLimit u<= latchStart + guardLimit - 1 - guardStart
123 // then:
124 // (the ranges below are written in ConstantRange notation, where [A, B) is the
125 // set for (I = A; I != B; I++ /*maywrap*/) yield(I);)
126 //
127 // forall X . guardStart + X u< guardLimit &&
128 // latchStart + X u< latchLimit =>
129 // guardStart + X + 1 u< guardLimit
130 // == forall X . guardStart + X u< guardLimit &&
131 // latchStart + X u< latchStart + guardLimit - 1 - guardStart =>
132 // guardStart + X + 1 u< guardLimit
133 // == forall X . (guardStart + X) in [0, guardLimit) &&
134 // (latchStart + X) in [0, latchStart + guardLimit - 1 - guardStart) =>
135 // (guardStart + X + 1) in [0, guardLimit)
136 // == forall X . X in [-guardStart, guardLimit - guardStart) &&
137 // X in [-latchStart, guardLimit - 1 - guardStart) =>
138 // X in [-guardStart - 1, guardLimit - guardStart - 1)
139 // == true
140 //
141 // So the widened condition is:
142 // guardStart u< guardLimit &&
143 // latchStart + guardLimit - 1 - guardStart u>= latchLimit
144 // Similarly for ule condition the widened condition is:
145 // guardStart u< guardLimit &&
146 // latchStart + guardLimit - 1 - guardStart u> latchLimit
147 // For slt condition the widened condition is:
148 // guardStart u< guardLimit &&
149 // latchStart + guardLimit - 1 - guardStart s>= latchLimit
150 // For sle condition the widened condition is:
151 // guardStart u< guardLimit &&
152 // latchStart + guardLimit - 1 - guardStart s> latchLimit
153 //
154 // When S = -1 (i.e. reverse iterating loop), the transformation is supported
155 // when:
156 // * The loop has a single latch with the condition of the form:
157 // B(X) = X <pred> latchLimit, where <pred> is u>, u>=, s>, or s>=.
158 // * The guard condition is of the form
159 // G(X) = X - 1 u< guardLimit
160 //
161 // For the ugt latch comparison case M is:
162 // forall X. X-1 u< guardLimit and X u> latchLimit => X-2 u< guardLimit
163 //
164 // The only way the antecedent can be true and the consequent can be false is if
165 // X == 1.
166 // If X == 1 then the second half of the antecedent is
167 // 1 u> latchLimit, and its negation is latchLimit u>= 1.
168 //
169 // So the widened condition is:
170 // guardStart u< guardLimit && latchLimit u>= 1.
171 // Similarly for sgt condition the widened condition is:
172 // guardStart u< guardLimit && latchLimit s>= 1.
173 // For uge condition the widened condition is:
174 // guardStart u< guardLimit && latchLimit u> 1.
175 // For sge condition the widened condition is:
176 // guardStart u< guardLimit && latchLimit s> 1.
177 //===----------------------------------------------------------------------===//
178 
180 #include "llvm/ADT/Statistic.h"
184 #include "llvm/Analysis/LoopInfo.h"
185 #include "llvm/Analysis/LoopPass.h"
186 #include "llvm/Analysis/MemorySSA.h"
190 #include "llvm/IR/Function.h"
191 #include "llvm/IR/IntrinsicInst.h"
192 #include "llvm/IR/Module.h"
193 #include "llvm/IR/PatternMatch.h"
194 #include "llvm/InitializePasses.h"
195 #include "llvm/Pass.h"
197 #include "llvm/Support/Debug.h"
198 #include "llvm/Transforms/Scalar.h"
203 #include <optional>
204 
205 #define DEBUG_TYPE "loop-predication"
206 
207 STATISTIC(TotalConsidered, "Number of guards considered");
208 STATISTIC(TotalWidened, "Number of checks widened");
209 
210 using namespace llvm;
211 
212 static cl::opt<bool> EnableIVTruncation("loop-predication-enable-iv-truncation",
213  cl::Hidden, cl::init(true));
214 
215 static cl::opt<bool> EnableCountDownLoop("loop-predication-enable-count-down-loop",
216  cl::Hidden, cl::init(true));
217 
218 static cl::opt<bool>
219  SkipProfitabilityChecks("loop-predication-skip-profitability-checks",
220  cl::Hidden, cl::init(false));
221 
222 // This is the scale factor for the latch probability. We use this during
223 // profitability analysis to find other exiting blocks that have a much higher
224 // probability of exiting the loop instead of loop exiting via latch.
225 // This value should be greater than 1 for a sane profitability check.
227  "loop-predication-latch-probability-scale", cl::Hidden, cl::init(2.0),
228  cl::desc("scale factor for the latch probability. Value should be greater "
229  "than 1. Lower values are ignored"));
230 
232  "loop-predication-predicate-widenable-branches-to-deopt", cl::Hidden,
233  cl::desc("Whether or not we should predicate guards "
234  "expressed as widenable branches to deoptimize blocks"),
235  cl::init(true));
236 
238  "loop-predication-insert-assumes-of-predicated-guards-conditions",
239  cl::Hidden,
240  cl::desc("Whether or not we should insert assumes of conditions of "
241  "predicated guards"),
242  cl::init(true));
243 
244 namespace {
245 /// Represents an induction variable check:
246 /// icmp Pred, <induction variable>, <loop invariant limit>
247 struct LoopICmp {
248  ICmpInst::Predicate Pred;
249  const SCEVAddRecExpr *IV;
250  const SCEV *Limit;
251  LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV,
252  const SCEV *Limit)
253  : Pred(Pred), IV(IV), Limit(Limit) {}
254  LoopICmp() = default;
255  void dump() {
256  dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV
257  << ", Limit = " << *Limit << "\n";
258  }
259 };
260 
261 class LoopPredication {
262  AliasAnalysis *AA;
263  DominatorTree *DT;
264  ScalarEvolution *SE;
265  LoopInfo *LI;
266  MemorySSAUpdater *MSSAU;
267 
268  Loop *L;
269  const DataLayout *DL;
270  BasicBlock *Preheader;
271  LoopICmp LatchCheck;
272 
273  bool isSupportedStep(const SCEV* Step);
274  Optional<LoopICmp> parseLoopICmp(ICmpInst *ICI);
275  Optional<LoopICmp> parseLoopLatchICmp();
276 
277  /// Return an insertion point suitable for inserting a safe to speculate
278  /// instruction whose only user will be 'User' which has operands 'Ops'. A
279  /// trivial result would be the at the User itself, but we try to return a
280  /// loop invariant location if possible.
281  Instruction *findInsertPt(Instruction *User, ArrayRef<Value*> Ops);
282  /// Same as above, *except* that this uses the SCEV definition of invariant
283  /// which is that an expression *can be made* invariant via SCEVExpander.
284  /// Thus, this version is only suitable for finding an insert point to be be
285  /// passed to SCEVExpander!
286  Instruction *findInsertPt(const SCEVExpander &Expander, Instruction *User,
288 
289  /// Return true if the value is known to produce a single fixed value across
290  /// all iterations on which it executes. Note that this does not imply
291  /// speculation safety. That must be established separately.
292  bool isLoopInvariantValue(const SCEV* S);
293 
294  Value *expandCheck(SCEVExpander &Expander, Instruction *Guard,
295  ICmpInst::Predicate Pred, const SCEV *LHS,
296  const SCEV *RHS);
297 
298  Optional<Value *> widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander,
299  Instruction *Guard);
300  Optional<Value *> widenICmpRangeCheckIncrementingLoop(LoopICmp LatchCheck,
301  LoopICmp RangeCheck,
302  SCEVExpander &Expander,
303  Instruction *Guard);
304  Optional<Value *> widenICmpRangeCheckDecrementingLoop(LoopICmp LatchCheck,
305  LoopICmp RangeCheck,
306  SCEVExpander &Expander,
307  Instruction *Guard);
308  unsigned collectChecks(SmallVectorImpl<Value *> &Checks, Value *Condition,
309  SCEVExpander &Expander, Instruction *Guard);
310  bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander);
311  bool widenWidenableBranchGuardConditions(BranchInst *Guard, SCEVExpander &Expander);
312  // If the loop always exits through another block in the loop, we should not
313  // predicate based on the latch check. For example, the latch check can be a
314  // very coarse grained check and there can be more fine grained exit checks
315  // within the loop.
316  bool isLoopProfitableToPredicate();
317 
318  bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
319 
320 public:
322  LoopInfo *LI, MemorySSAUpdater *MSSAU)
323  : AA(AA), DT(DT), SE(SE), LI(LI), MSSAU(MSSAU){};
324  bool runOnLoop(Loop *L);
325 };
326 
327 class LoopPredicationLegacyPass : public LoopPass {
328 public:
329  static char ID;
330  LoopPredicationLegacyPass() : LoopPass(ID) {
332  }
333 
334  void getAnalysisUsage(AnalysisUsage &AU) const override {
338  }
339 
340  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
341  if (skipLoop(L))
342  return false;
343  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
344  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
345  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
346  auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
347  std::unique_ptr<MemorySSAUpdater> MSSAU;
348  if (MSSAWP)
349  MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA());
350  auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
351  LoopPredication LP(AA, DT, SE, LI, MSSAU ? MSSAU.get() : nullptr);
352  return LP.runOnLoop(L);
353  }
354 };
355 
357 } // end namespace
358 
359 INITIALIZE_PASS_BEGIN(LoopPredicationLegacyPass, "loop-predication",
360  "Loop predication", false, false)
363 INITIALIZE_PASS_END(LoopPredicationLegacyPass, "loop-predication",
365 
367  return new LoopPredicationLegacyPass();
368 }
369 
372  LPMUpdater &U) {
373  std::unique_ptr<MemorySSAUpdater> MSSAU;
374  if (AR.MSSA)
375  MSSAU = std::make_unique<MemorySSAUpdater>(AR.MSSA);
376  LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI,
377  MSSAU ? MSSAU.get() : nullptr);
378  if (!LP.runOnLoop(&L))
379  return PreservedAnalyses::all();
380 
381  auto PA = getLoopPassPreservedAnalyses();
382  if (AR.MSSA)
383  PA.preserve<MemorySSAAnalysis>();
384  return PA;
385 }
386 
388 LoopPredication::parseLoopICmp(ICmpInst *ICI) {
389  auto Pred = ICI->getPredicate();
390  auto *LHS = ICI->getOperand(0);
391  auto *RHS = ICI->getOperand(1);
392 
393  const SCEV *LHSS = SE->getSCEV(LHS);
394  if (isa<SCEVCouldNotCompute>(LHSS))
395  return None;
396  const SCEV *RHSS = SE->getSCEV(RHS);
397  if (isa<SCEVCouldNotCompute>(RHSS))
398  return None;
399 
400  // Canonicalize RHS to be loop invariant bound, LHS - a loop computable IV
401  if (SE->isLoopInvariant(LHSS, L)) {
402  std::swap(LHS, RHS);
403  std::swap(LHSS, RHSS);
404  Pred = ICmpInst::getSwappedPredicate(Pred);
405  }
406 
407  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHSS);
408  if (!AR || AR->getLoop() != L)
409  return None;
410 
411  return LoopICmp(Pred, AR, RHSS);
412 }
413 
414 Value *LoopPredication::expandCheck(SCEVExpander &Expander,
415  Instruction *Guard,
416  ICmpInst::Predicate Pred, const SCEV *LHS,
417  const SCEV *RHS) {
418  Type *Ty = LHS->getType();
419  assert(Ty == RHS->getType() && "expandCheck operands have different types?");
420 
421  if (SE->isLoopInvariant(LHS, L) && SE->isLoopInvariant(RHS, L)) {
422  IRBuilder<> Builder(Guard);
423  if (SE->isLoopEntryGuardedByCond(L, Pred, LHS, RHS))
424  return Builder.getTrue();
425  if (SE->isLoopEntryGuardedByCond(L, ICmpInst::getInversePredicate(Pred),
426  LHS, RHS))
427  return Builder.getFalse();
428  }
429 
430  Value *LHSV =
431  Expander.expandCodeFor(LHS, Ty, findInsertPt(Expander, Guard, {LHS}));
432  Value *RHSV =
433  Expander.expandCodeFor(RHS, Ty, findInsertPt(Expander, Guard, {RHS}));
434  IRBuilder<> Builder(findInsertPt(Guard, {LHSV, RHSV}));
435  return Builder.CreateICmp(Pred, LHSV, RHSV);
436 }
437 
438 // Returns true if its safe to truncate the IV to RangeCheckType.
439 // When the IV type is wider than the range operand type, we can still do loop
440 // predication, by generating SCEVs for the range and latch that are of the
441 // same type. We achieve this by generating a SCEV truncate expression for the
442 // latch IV. This is done iff truncation of the IV is a safe operation,
443 // without loss of information.
444 // Another way to achieve this is by generating a wider type SCEV for the
445 // range check operand, however, this needs a more involved check that
446 // operands do not overflow. This can lead to loss of information when the
447 // range operand is of the form: add i32 %offset, %iv. We need to prove that
448 // sext(x + y) is same as sext(x) + sext(y).
449 // This function returns true if we can safely represent the IV type in
450 // the RangeCheckType without loss of information.
452  ScalarEvolution &SE,
453  const LoopICmp LatchCheck,
454  Type *RangeCheckType) {
455  if (!EnableIVTruncation)
456  return false;
457  assert(DL.getTypeSizeInBits(LatchCheck.IV->getType()).getFixedSize() >
458  DL.getTypeSizeInBits(RangeCheckType).getFixedSize() &&
459  "Expected latch check IV type to be larger than range check operand "
460  "type!");
461  // The start and end values of the IV should be known. This is to guarantee
462  // that truncating the wide type will not lose information.
463  auto *Limit = dyn_cast<SCEVConstant>(LatchCheck.Limit);
464  auto *Start = dyn_cast<SCEVConstant>(LatchCheck.IV->getStart());
465  if (!Limit || !Start)
466  return false;
467  // This check makes sure that the IV does not change sign during loop
468  // iterations. Consider latchType = i64, LatchStart = 5, Pred = ICMP_SGE,
469  // LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the
470  // IV wraps around, and the truncation of the IV would lose the range of
471  // iterations between 2^32 and 2^64.
472  if (!SE.getMonotonicPredicateType(LatchCheck.IV, LatchCheck.Pred))
473  return false;
474  // The active bits should be less than the bits in the RangeCheckType. This
475  // guarantees that truncating the latch check to RangeCheckType is a safe
476  // operation.
477  auto RangeCheckTypeBitSize =
478  DL.getTypeSizeInBits(RangeCheckType).getFixedSize();
479  return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize &&
480  Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize;
481 }
482 
483 
484 // Return an LoopICmp describing a latch check equivlent to LatchCheck but with
485 // the requested type if safe to do so. May involve the use of a new IV.
486 static std::optional<LoopICmp> generateLoopLatchCheck(const DataLayout &DL,
487  ScalarEvolution &SE,
488  const LoopICmp LatchCheck,
489  Type *RangeCheckType) {
490 
491  auto *LatchType = LatchCheck.IV->getType();
492  if (RangeCheckType == LatchType)
493  return LatchCheck;
494  // For now, bail out if latch type is narrower than range type.
495  if (DL.getTypeSizeInBits(LatchType).getFixedSize() <
496  DL.getTypeSizeInBits(RangeCheckType).getFixedSize())
497  return None;
498  if (!isSafeToTruncateWideIVType(DL, SE, LatchCheck, RangeCheckType))
499  return None;
500  // We can now safely identify the truncated version of the IV and limit for
501  // RangeCheckType.
502  LoopICmp NewLatchCheck;
503  NewLatchCheck.Pred = LatchCheck.Pred;
504  NewLatchCheck.IV = dyn_cast<SCEVAddRecExpr>(
505  SE.getTruncateExpr(LatchCheck.IV, RangeCheckType));
506  if (!NewLatchCheck.IV)
507  return None;
508  NewLatchCheck.Limit = SE.getTruncateExpr(LatchCheck.Limit, RangeCheckType);
509  LLVM_DEBUG(dbgs() << "IV of type: " << *LatchType
510  << "can be represented as range check type:"
511  << *RangeCheckType << "\n");
512  LLVM_DEBUG(dbgs() << "LatchCheck.IV: " << *NewLatchCheck.IV << "\n");
513  LLVM_DEBUG(dbgs() << "LatchCheck.Limit: " << *NewLatchCheck.Limit << "\n");
514  return NewLatchCheck;
515 }
516 
517 bool LoopPredication::isSupportedStep(const SCEV* Step) {
518  return Step->isOne() || (Step->isAllOnesValue() && EnableCountDownLoop);
519 }
520 
521 Instruction *LoopPredication::findInsertPt(Instruction *Use,
522  ArrayRef<Value*> Ops) {
523  for (Value *Op : Ops)
524  if (!L->isLoopInvariant(Op))
525  return Use;
526  return Preheader->getTerminator();
527 }
528 
529 Instruction *LoopPredication::findInsertPt(const SCEVExpander &Expander,
530  Instruction *Use,
532  // Subtlety: SCEV considers things to be invariant if the value produced is
533  // the same across iterations. This is not the same as being able to
534  // evaluate outside the loop, which is what we actually need here.
535  for (const SCEV *Op : Ops)
536  if (!SE->isLoopInvariant(Op, L) ||
537  !Expander.isSafeToExpandAt(Op, Preheader->getTerminator()))
538  return Use;
539  return Preheader->getTerminator();
540 }
541 
542 bool LoopPredication::isLoopInvariantValue(const SCEV* S) {
543  // Handling expressions which produce invariant results, but *haven't* yet
544  // been removed from the loop serves two important purposes.
545  // 1) Most importantly, it resolves a pass ordering cycle which would
546  // otherwise need us to iteration licm, loop-predication, and either
547  // loop-unswitch or loop-peeling to make progress on examples with lots of
548  // predicable range checks in a row. (Since, in the general case, we can't
549  // hoist the length checks until the dominating checks have been discharged
550  // as we can't prove doing so is safe.)
551  // 2) As a nice side effect, this exposes the value of peeling or unswitching
552  // much more obviously in the IR. Otherwise, the cost modeling for other
553  // transforms would end up needing to duplicate all of this logic to model a
554  // check which becomes predictable based on a modeled peel or unswitch.
555  //
556  // The cost of doing so in the worst case is an extra fill from the stack in
557  // the loop to materialize the loop invariant test value instead of checking
558  // against the original IV which is presumable in a register inside the loop.
559  // Such cases are presumably rare, and hint at missing oppurtunities for
560  // other passes.
561 
562  if (SE->isLoopInvariant(S, L))
563  // Note: This the SCEV variant, so the original Value* may be within the
564  // loop even though SCEV has proven it is loop invariant.
565  return true;
566 
567  // Handle a particular important case which SCEV doesn't yet know about which
568  // shows up in range checks on arrays with immutable lengths.
569  // TODO: This should be sunk inside SCEV.
570  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S))
571  if (const auto *LI = dyn_cast<LoadInst>(U->getValue()))
572  if (LI->isUnordered() && L->hasLoopInvariantOperands(LI))
573  if (!isModSet(AA->getModRefInfoMask(LI->getOperand(0))) ||
574  LI->hasMetadata(LLVMContext::MD_invariant_load))
575  return true;
576  return false;
577 }
578 
579 Optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop(
580  LoopICmp LatchCheck, LoopICmp RangeCheck,
581  SCEVExpander &Expander, Instruction *Guard) {
582  auto *Ty = RangeCheck.IV->getType();
583  // Generate the widened condition for the forward loop:
584  // guardStart u< guardLimit &&
585  // latchLimit <pred> guardLimit - 1 - guardStart + latchStart
586  // where <pred> depends on the latch condition predicate. See the file
587  // header comment for the reasoning.
588  // guardLimit - guardStart + latchStart - 1
589  const SCEV *GuardStart = RangeCheck.IV->getStart();
590  const SCEV *GuardLimit = RangeCheck.Limit;
591  const SCEV *LatchStart = LatchCheck.IV->getStart();
592  const SCEV *LatchLimit = LatchCheck.Limit;
593  // Subtlety: We need all the values to be *invariant* across all iterations,
594  // but we only need to check expansion safety for those which *aren't*
595  // already guaranteed to dominate the guard.
596  if (!isLoopInvariantValue(GuardStart) ||
597  !isLoopInvariantValue(GuardLimit) ||
598  !isLoopInvariantValue(LatchStart) ||
599  !isLoopInvariantValue(LatchLimit)) {
600  LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
601  return None;
602  }
603  if (!Expander.isSafeToExpandAt(LatchStart, Guard) ||
604  !Expander.isSafeToExpandAt(LatchLimit, Guard)) {
605  LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
606  return None;
607  }
608 
609  // guardLimit - guardStart + latchStart - 1
610  const SCEV *RHS =
611  SE->getAddExpr(SE->getMinusSCEV(GuardLimit, GuardStart),
612  SE->getMinusSCEV(LatchStart, SE->getOne(Ty)));
613  auto LimitCheckPred =
615 
616  LLVM_DEBUG(dbgs() << "LHS: " << *LatchLimit << "\n");
617  LLVM_DEBUG(dbgs() << "RHS: " << *RHS << "\n");
618  LLVM_DEBUG(dbgs() << "Pred: " << LimitCheckPred << "\n");
619 
620  auto *LimitCheck =
621  expandCheck(Expander, Guard, LimitCheckPred, LatchLimit, RHS);
622  auto *FirstIterationCheck = expandCheck(Expander, Guard, RangeCheck.Pred,
623  GuardStart, GuardLimit);
624  IRBuilder<> Builder(findInsertPt(Guard, {FirstIterationCheck, LimitCheck}));
625  return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
626 }
627 
628 Optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop(
629  LoopICmp LatchCheck, LoopICmp RangeCheck,
630  SCEVExpander &Expander, Instruction *Guard) {
631  auto *Ty = RangeCheck.IV->getType();
632  const SCEV *GuardStart = RangeCheck.IV->getStart();
633  const SCEV *GuardLimit = RangeCheck.Limit;
634  const SCEV *LatchStart = LatchCheck.IV->getStart();
635  const SCEV *LatchLimit = LatchCheck.Limit;
636  // Subtlety: We need all the values to be *invariant* across all iterations,
637  // but we only need to check expansion safety for those which *aren't*
638  // already guaranteed to dominate the guard.
639  if (!isLoopInvariantValue(GuardStart) ||
640  !isLoopInvariantValue(GuardLimit) ||
641  !isLoopInvariantValue(LatchStart) ||
642  !isLoopInvariantValue(LatchLimit)) {
643  LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
644  return None;
645  }
646  if (!Expander.isSafeToExpandAt(LatchStart, Guard) ||
647  !Expander.isSafeToExpandAt(LatchLimit, Guard)) {
648  LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
649  return None;
650  }
651  // The decrement of the latch check IV should be the same as the
652  // rangeCheckIV.
653  auto *PostDecLatchCheckIV = LatchCheck.IV->getPostIncExpr(*SE);
654  if (RangeCheck.IV != PostDecLatchCheckIV) {
655  LLVM_DEBUG(dbgs() << "Not the same. PostDecLatchCheckIV: "
656  << *PostDecLatchCheckIV
657  << " and RangeCheckIV: " << *RangeCheck.IV << "\n");
658  return None;
659  }
660 
661  // Generate the widened condition for CountDownLoop:
662  // guardStart u< guardLimit &&
663  // latchLimit <pred> 1.
664  // See the header comment for reasoning of the checks.
665  auto LimitCheckPred =
667  auto *FirstIterationCheck = expandCheck(Expander, Guard,
669  GuardStart, GuardLimit);
670  auto *LimitCheck = expandCheck(Expander, Guard, LimitCheckPred, LatchLimit,
671  SE->getOne(Ty));
672  IRBuilder<> Builder(findInsertPt(Guard, {FirstIterationCheck, LimitCheck}));
673  return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
674 }
675 
677  LoopICmp& RC) {
678  // LFTR canonicalizes checks to the ICMP_NE/EQ form; normalize back to the
679  // ULT/UGE form for ease of handling by our caller.
680  if (ICmpInst::isEquality(RC.Pred) &&
681  RC.IV->getStepRecurrence(*SE)->isOne() &&
682  SE->isKnownPredicate(ICmpInst::ICMP_ULE, RC.IV->getStart(), RC.Limit))
683  RC.Pred = RC.Pred == ICmpInst::ICMP_NE ?
685 }
686 
687 
688 /// If ICI can be widened to a loop invariant condition emits the loop
689 /// invariant condition in the loop preheader and return it, otherwise
690 /// returns None.
691 Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
692  SCEVExpander &Expander,
693  Instruction *Guard) {
694  LLVM_DEBUG(dbgs() << "Analyzing ICmpInst condition:\n");
695  LLVM_DEBUG(ICI->dump());
696 
697  // parseLoopStructure guarantees that the latch condition is:
698  // ++i <pred> latchLimit, where <pred> is u<, u<=, s<, or s<=.
699  // We are looking for the range checks of the form:
700  // i u< guardLimit
701  auto RangeCheck = parseLoopICmp(ICI);
702  if (!RangeCheck) {
703  LLVM_DEBUG(dbgs() << "Failed to parse the loop latch condition!\n");
704  return None;
705  }
706  LLVM_DEBUG(dbgs() << "Guard check:\n");
707  LLVM_DEBUG(RangeCheck->dump());
708  if (RangeCheck->Pred != ICmpInst::ICMP_ULT) {
709  LLVM_DEBUG(dbgs() << "Unsupported range check predicate("
710  << RangeCheck->Pred << ")!\n");
711  return None;
712  }
713  auto *RangeCheckIV = RangeCheck->IV;
714  if (!RangeCheckIV->isAffine()) {
715  LLVM_DEBUG(dbgs() << "Range check IV is not affine!\n");
716  return None;
717  }
718  auto *Step = RangeCheckIV->getStepRecurrence(*SE);
719  // We cannot just compare with latch IV step because the latch and range IVs
720  // may have different types.
721  if (!isSupportedStep(Step)) {
722  LLVM_DEBUG(dbgs() << "Range check and latch have IVs different steps!\n");
723  return None;
724  }
725  auto *Ty = RangeCheckIV->getType();
726  auto CurrLatchCheckOpt = generateLoopLatchCheck(*DL, *SE, LatchCheck, Ty);
727  if (!CurrLatchCheckOpt) {
728  LLVM_DEBUG(dbgs() << "Failed to generate a loop latch check "
729  "corresponding to range type: "
730  << *Ty << "\n");
731  return None;
732  }
733 
734  LoopICmp CurrLatchCheck = *CurrLatchCheckOpt;
735  // At this point, the range and latch step should have the same type, but need
736  // not have the same value (we support both 1 and -1 steps).
737  assert(Step->getType() ==
738  CurrLatchCheck.IV->getStepRecurrence(*SE)->getType() &&
739  "Range and latch steps should be of same type!");
740  if (Step != CurrLatchCheck.IV->getStepRecurrence(*SE)) {
741  LLVM_DEBUG(dbgs() << "Range and latch have different step values!\n");
742  return None;
743  }
744 
745  if (Step->isOne())
746  return widenICmpRangeCheckIncrementingLoop(CurrLatchCheck, *RangeCheck,
747  Expander, Guard);
748  else {
749  assert(Step->isAllOnesValue() && "Step should be -1!");
750  return widenICmpRangeCheckDecrementingLoop(CurrLatchCheck, *RangeCheck,
751  Expander, Guard);
752  }
753 }
754 
755 unsigned LoopPredication::collectChecks(SmallVectorImpl<Value *> &Checks,
756  Value *Condition,
757  SCEVExpander &Expander,
758  Instruction *Guard) {
759  unsigned NumWidened = 0;
760  // The guard condition is expected to be in form of:
761  // cond1 && cond2 && cond3 ...
762  // Iterate over subconditions looking for icmp conditions which can be
763  // widened across loop iterations. Widening these conditions remember the
764  // resulting list of subconditions in Checks vector.
765  SmallVector<Value *, 4> Worklist(1, Condition);
766  SmallPtrSet<Value *, 4> Visited;
767  Visited.insert(Condition);
768  Value *WideableCond = nullptr;
769  do {
770  Value *Condition = Worklist.pop_back_val();
771  Value *LHS, *RHS;
772  using namespace llvm::PatternMatch;
773  if (match(Condition, m_And(m_Value(LHS), m_Value(RHS)))) {
774  if (Visited.insert(LHS).second)
775  Worklist.push_back(LHS);
776  if (Visited.insert(RHS).second)
777  Worklist.push_back(RHS);
778  continue;
779  }
780 
781  if (match(Condition,
782  m_Intrinsic<Intrinsic::experimental_widenable_condition>())) {
783  // Pick any, we don't care which
784  WideableCond = Condition;
785  continue;
786  }
787 
788  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) {
789  if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander,
790  Guard)) {
791  Checks.push_back(*NewRangeCheck);
792  NumWidened++;
793  continue;
794  }
795  }
796 
797  // Save the condition as is if we can't widen it
798  Checks.push_back(Condition);
799  } while (!Worklist.empty());
800  // At the moment, our matching logic for wideable conditions implicitly
801  // assumes we preserve the form: (br (and Cond, WC())). FIXME
802  // Note that if there were multiple calls to wideable condition in the
803  // traversal, we only need to keep one, and which one is arbitrary.
804  if (WideableCond)
805  Checks.push_back(WideableCond);
806  return NumWidened;
807 }
808 
809 bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
810  SCEVExpander &Expander) {
811  LLVM_DEBUG(dbgs() << "Processing guard:\n");
812  LLVM_DEBUG(Guard->dump());
813 
814  TotalConsidered++;
816  unsigned NumWidened = collectChecks(Checks, Guard->getOperand(0), Expander,
817  Guard);
818  if (NumWidened == 0)
819  return false;
820 
821  TotalWidened += NumWidened;
822 
823  // Emit the new guard condition
824  IRBuilder<> Builder(findInsertPt(Guard, Checks));
825  Value *AllChecks = Builder.CreateAnd(Checks);
826  auto *OldCond = Guard->getOperand(0);
827  Guard->setOperand(0, AllChecks);
829  Builder.SetInsertPoint(&*++BasicBlock::iterator(Guard));
830  Builder.CreateAssumption(OldCond);
831  }
832  RecursivelyDeleteTriviallyDeadInstructions(OldCond, nullptr /* TLI */, MSSAU);
833 
834  LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
835  return true;
836 }
837 
838 bool LoopPredication::widenWidenableBranchGuardConditions(
839  BranchInst *BI, SCEVExpander &Expander) {
840  assert(isGuardAsWidenableBranch(BI) && "Must be!");
841  LLVM_DEBUG(dbgs() << "Processing guard:\n");
842  LLVM_DEBUG(BI->dump());
843 
844  Value *Cond, *WC;
845  BasicBlock *IfTrueBB, *IfFalseBB;
846  bool Parsed = parseWidenableBranch(BI, Cond, WC, IfTrueBB, IfFalseBB);
847  assert(Parsed && "Must be able to parse widenable branch");
848  (void)Parsed;
849 
850  TotalConsidered++;
852  unsigned NumWidened = collectChecks(Checks, BI->getCondition(),
853  Expander, BI);
854  if (NumWidened == 0)
855  return false;
856 
857  TotalWidened += NumWidened;
858 
859  // Emit the new guard condition
860  IRBuilder<> Builder(findInsertPt(BI, Checks));
861  Value *AllChecks = Builder.CreateAnd(Checks);
862  auto *OldCond = BI->getCondition();
863  BI->setCondition(AllChecks);
865  Builder.SetInsertPoint(IfTrueBB, IfTrueBB->getFirstInsertionPt());
866  Builder.CreateAssumption(Cond);
867  }
868  RecursivelyDeleteTriviallyDeadInstructions(OldCond, nullptr /* TLI */, MSSAU);
870  "Stopped being a guard after transform?");
871 
872  LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
873  return true;
874 }
875 
876 Optional<LoopICmp> LoopPredication::parseLoopLatchICmp() {
877  using namespace PatternMatch;
878 
879  BasicBlock *LoopLatch = L->getLoopLatch();
880  if (!LoopLatch) {
881  LLVM_DEBUG(dbgs() << "The loop doesn't have a single latch!\n");
882  return None;
883  }
884 
885  auto *BI = dyn_cast<BranchInst>(LoopLatch->getTerminator());
886  if (!BI || !BI->isConditional()) {
887  LLVM_DEBUG(dbgs() << "Failed to match the latch terminator!\n");
888  return None;
889  }
890  BasicBlock *TrueDest = BI->getSuccessor(0);
891  assert(
892  (TrueDest == L->getHeader() || BI->getSuccessor(1) == L->getHeader()) &&
893  "One of the latch's destinations must be the header");
894 
895  auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
896  if (!ICI) {
897  LLVM_DEBUG(dbgs() << "Failed to match the latch condition!\n");
898  return None;
899  }
900  auto Result = parseLoopICmp(ICI);
901  if (!Result) {
902  LLVM_DEBUG(dbgs() << "Failed to parse the loop latch condition!\n");
903  return None;
904  }
905 
906  if (TrueDest != L->getHeader())
908 
909  // Check affine first, so if it's not we don't try to compute the step
910  // recurrence.
911  if (!Result->IV->isAffine()) {
912  LLVM_DEBUG(dbgs() << "The induction variable is not affine!\n");
913  return None;
914  }
915 
916  auto *Step = Result->IV->getStepRecurrence(*SE);
917  if (!isSupportedStep(Step)) {
918  LLVM_DEBUG(dbgs() << "Unsupported loop stride(" << *Step << ")!\n");
919  return None;
920  }
921 
922  auto IsUnsupportedPredicate = [](const SCEV *Step, ICmpInst::Predicate Pred) {
923  if (Step->isOne()) {
924  return Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_SLT &&
925  Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_SLE;
926  } else {
927  assert(Step->isAllOnesValue() && "Step should be -1!");
928  return Pred != ICmpInst::ICMP_UGT && Pred != ICmpInst::ICMP_SGT &&
929  Pred != ICmpInst::ICMP_UGE && Pred != ICmpInst::ICMP_SGE;
930  }
931  };
932 
933  normalizePredicate(SE, L, *Result);
934  if (IsUnsupportedPredicate(Step, Result->Pred)) {
935  LLVM_DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred
936  << ")!\n");
937  return None;
938  }
939 
940  return Result;
941 }
942 
943 
944 bool LoopPredication::isLoopProfitableToPredicate() {
946  return true;
947 
949  L->getExitEdges(ExitEdges);
950  // If there is only one exiting edge in the loop, it is always profitable to
951  // predicate the loop.
952  if (ExitEdges.size() == 1)
953  return true;
954 
955  // Calculate the exiting probabilities of all exiting edges from the loop,
956  // starting with the LatchExitProbability.
957  // Heuristic for profitability: If any of the exiting blocks' probability of
958  // exiting the loop is larger than exiting through the latch block, it's not
959  // profitable to predicate the loop.
960  auto *LatchBlock = L->getLoopLatch();
961  assert(LatchBlock && "Should have a single latch at this point!");
962  auto *LatchTerm = LatchBlock->getTerminator();
963  assert(LatchTerm->getNumSuccessors() == 2 &&
964  "expected to be an exiting block with 2 succs!");
965  unsigned LatchBrExitIdx =
966  LatchTerm->getSuccessor(0) == L->getHeader() ? 1 : 0;
967  // We compute branch probabilities without BPI. We do not rely on BPI since
968  // Loop predication is usually run in an LPM and BPI is only preserved
969  // lossily within loop pass managers, while BPI has an inherent notion of
970  // being complete for an entire function.
971 
972  // If the latch exits into a deoptimize or an unreachable block, do not
973  // predicate on that latch check.
974  auto *LatchExitBlock = LatchTerm->getSuccessor(LatchBrExitIdx);
975  if (isa<UnreachableInst>(LatchTerm) ||
976  LatchExitBlock->getTerminatingDeoptimizeCall())
977  return false;
978 
979  auto IsValidProfileData = [](MDNode *ProfileData, const Instruction *Term) {
980  if (!ProfileData || !ProfileData->getOperand(0))
981  return false;
982  if (MDString *MDS = dyn_cast<MDString>(ProfileData->getOperand(0)))
983  if (!MDS->getString().equals("branch_weights"))
984  return false;
985  if (ProfileData->getNumOperands() != 1 + Term->getNumSuccessors())
986  return false;
987  return true;
988  };
989  MDNode *LatchProfileData = LatchTerm->getMetadata(LLVMContext::MD_prof);
990  // Latch terminator has no valid profile data, so nothing to check
991  // profitability on.
992  if (!IsValidProfileData(LatchProfileData, LatchTerm))
993  return true;
994 
995  auto ComputeBranchProbability =
996  [&](const BasicBlock *ExitingBlock,
997  const BasicBlock *ExitBlock) -> BranchProbability {
998  auto *Term = ExitingBlock->getTerminator();
999  MDNode *ProfileData = Term->getMetadata(LLVMContext::MD_prof);
1000  unsigned NumSucc = Term->getNumSuccessors();
1001  if (IsValidProfileData(ProfileData, Term)) {
1002  uint64_t Numerator = 0, Denominator = 0, ProfVal = 0;
1003  for (unsigned i = 0; i < NumSucc; i++) {
1004  ConstantInt *CI =
1005  mdconst::extract<ConstantInt>(ProfileData->getOperand(i + 1));
1006  ProfVal = CI->getValue().getZExtValue();
1007  if (Term->getSuccessor(i) == ExitBlock)
1008  Numerator += ProfVal;
1009  Denominator += ProfVal;
1010  }
1011  return BranchProbability::getBranchProbability(Numerator, Denominator);
1012  } else {
1013  assert(LatchBlock != ExitingBlock &&
1014  "Latch term should always have profile data!");
1015  // No profile data, so we choose the weight as 1/num_of_succ(Src)
1016  return BranchProbability::getBranchProbability(1, NumSucc);
1017  }
1018  };
1019 
1020  BranchProbability LatchExitProbability =
1021  ComputeBranchProbability(LatchBlock, LatchExitBlock);
1022 
1023  // Protect against degenerate inputs provided by the user. Providing a value
1024  // less than one, can invert the definition of profitable loop predication.
1025  float ScaleFactor = LatchExitProbabilityScale;
1026  if (ScaleFactor < 1) {
1027  LLVM_DEBUG(
1028  dbgs()
1029  << "Ignored user setting for loop-predication-latch-probability-scale: "
1030  << LatchExitProbabilityScale << "\n");
1031  LLVM_DEBUG(dbgs() << "The value is set to 1.0\n");
1032  ScaleFactor = 1.0;
1033  }
1034  const auto LatchProbabilityThreshold = LatchExitProbability * ScaleFactor;
1035 
1036  for (const auto &ExitEdge : ExitEdges) {
1037  BranchProbability ExitingBlockProbability =
1038  ComputeBranchProbability(ExitEdge.first, ExitEdge.second);
1039  // Some exiting edge has higher probability than the latch exiting edge.
1040  // No longer profitable to predicate.
1041  if (ExitingBlockProbability > LatchProbabilityThreshold)
1042  return false;
1043  }
1044 
1045  // We have concluded that the most probable way to exit from the
1046  // loop is through the latch (or there's no profile information and all
1047  // exits are equally likely).
1048  return true;
1049 }
1050 
1051 /// If we can (cheaply) find a widenable branch which controls entry into the
1052 /// loop, return it.
1054  // Walk back through any unconditional executed blocks and see if we can find
1055  // a widenable condition which seems to control execution of this loop. Note
1056  // that we predict that maythrow calls are likely untaken and thus that it's
1057  // profitable to widen a branch before a maythrow call with a condition
1058  // afterwards even though that may cause the slow path to run in a case where
1059  // it wouldn't have otherwise.
1060  BasicBlock *BB = L->getLoopPreheader();
1061  if (!BB)
1062  return nullptr;
1063  do {
1064  if (BasicBlock *Pred = BB->getSinglePredecessor())
1065  if (BB == Pred->getSingleSuccessor()) {
1066  BB = Pred;
1067  continue;
1068  }
1069  break;
1070  } while (true);
1071 
1072  if (BasicBlock *Pred = BB->getSinglePredecessor()) {
1073  auto *Term = Pred->getTerminator();
1074 
1075  Value *Cond, *WC;
1076  BasicBlock *IfTrueBB, *IfFalseBB;
1077  if (parseWidenableBranch(Term, Cond, WC, IfTrueBB, IfFalseBB) &&
1078  IfTrueBB == BB)
1079  return cast<BranchInst>(Term);
1080  }
1081  return nullptr;
1082 }
1083 
1084 /// Return the minimum of all analyzeable exit counts. This is an upper bound
1085 /// on the actual exit count. If there are not at least two analyzeable exits,
1086 /// returns SCEVCouldNotCompute.
1088  DominatorTree &DT,
1089  Loop *L) {
1090  SmallVector<BasicBlock *, 16> ExitingBlocks;
1091  L->getExitingBlocks(ExitingBlocks);
1092 
1093  SmallVector<const SCEV *, 4> ExitCounts;
1094  for (BasicBlock *ExitingBB : ExitingBlocks) {
1095  const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
1096  if (isa<SCEVCouldNotCompute>(ExitCount))
1097  continue;
1098  assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
1099  "We should only have known counts for exiting blocks that "
1100  "dominate latch!");
1101  ExitCounts.push_back(ExitCount);
1102  }
1103  if (ExitCounts.size() < 2)
1104  return SE.getCouldNotCompute();
1105  return SE.getUMinFromMismatchedTypes(ExitCounts);
1106 }
1107 
1108 /// This implements an analogous, but entirely distinct transform from the main
1109 /// loop predication transform. This one is phrased in terms of using a
1110 /// widenable branch *outside* the loop to allow us to simplify loop exits in a
1111 /// following loop. This is close in spirit to the IndVarSimplify transform
1112 /// of the same name, but is materially different widening loosens legality
1113 /// sharply.
1114 bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1115  // The transformation performed here aims to widen a widenable condition
1116  // above the loop such that all analyzeable exit leading to deopt are dead.
1117  // It assumes that the latch is the dominant exit for profitability and that
1118  // exits branching to deoptimizing blocks are rarely taken. It relies on the
1119  // semantics of widenable expressions for legality. (i.e. being able to fall
1120  // down the widenable path spuriously allows us to ignore exit order,
1121  // unanalyzeable exits, side effects, exceptional exits, and other challenges
1122  // which restrict the applicability of the non-WC based version of this
1123  // transform in IndVarSimplify.)
1124  //
1125  // NOTE ON POISON/UNDEF - We're hoisting an expression above guards which may
1126  // imply flags on the expression being hoisted and inserting new uses (flags
1127  // are only correct for current uses). The result is that we may be
1128  // inserting a branch on the value which can be either poison or undef. In
1129  // this case, the branch can legally go either way; we just need to avoid
1130  // introducing UB. This is achieved through the use of the freeze
1131  // instruction.
1132 
1133  SmallVector<BasicBlock *, 16> ExitingBlocks;
1134  L->getExitingBlocks(ExitingBlocks);
1135 
1136  if (ExitingBlocks.empty())
1137  return false; // Nothing to do.
1138 
1139  auto *Latch = L->getLoopLatch();
1140  if (!Latch)
1141  return false;
1142 
1143  auto *WidenableBR = FindWidenableTerminatorAboveLoop(L, *LI);
1144  if (!WidenableBR)
1145  return false;
1146 
1147  const SCEV *LatchEC = SE->getExitCount(L, Latch);
1148  if (isa<SCEVCouldNotCompute>(LatchEC))
1149  return false; // profitability - want hot exit in analyzeable set
1150 
1151  // At this point, we have found an analyzeable latch, and a widenable
1152  // condition above the loop. If we have a widenable exit within the loop
1153  // (for which we can't compute exit counts), drop the ability to further
1154  // widen so that we gain ability to analyze it's exit count and perform this
1155  // transform. TODO: It'd be nice to know for sure the exit became
1156  // analyzeable after dropping widenability.
1157  bool ChangedLoop = false;
1158 
1159  for (auto *ExitingBB : ExitingBlocks) {
1160  if (LI->getLoopFor(ExitingBB) != L)
1161  continue;
1162 
1163  auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1164  if (!BI)
1165  continue;
1166 
1167  Use *Cond, *WC;
1168  BasicBlock *IfTrueBB, *IfFalseBB;
1169  if (parseWidenableBranch(BI, Cond, WC, IfTrueBB, IfFalseBB) &&
1170  L->contains(IfTrueBB)) {
1171  WC->set(ConstantInt::getTrue(IfTrueBB->getContext()));
1172  ChangedLoop = true;
1173  }
1174  }
1175  if (ChangedLoop)
1176  SE->forgetLoop(L);
1177 
1178  // The use of umin(all analyzeable exits) instead of latch is subtle, but
1179  // important for profitability. We may have a loop which hasn't been fully
1180  // canonicalized just yet. If the exit we chose to widen is provably never
1181  // taken, we want the widened form to *also* be provably never taken. We
1182  // can't guarantee this as a current unanalyzeable exit may later become
1183  // analyzeable, but we can at least avoid the obvious cases.
1184  const SCEV *MinEC = getMinAnalyzeableBackedgeTakenCount(*SE, *DT, L);
1185  if (isa<SCEVCouldNotCompute>(MinEC) || MinEC->getType()->isPointerTy() ||
1186  !SE->isLoopInvariant(MinEC, L) ||
1187  !Rewriter.isSafeToExpandAt(MinEC, WidenableBR))
1188  return ChangedLoop;
1189 
1190  // Subtlety: We need to avoid inserting additional uses of the WC. We know
1191  // that it can only have one transitive use at the moment, and thus moving
1192  // that use to just before the branch and inserting code before it and then
1193  // modifying the operand is legal.
1194  auto *IP = cast<Instruction>(WidenableBR->getCondition());
1195  // Here we unconditionally modify the IR, so after this point we should return
1196  // only `true`!
1197  IP->moveBefore(WidenableBR);
1198  if (MSSAU)
1199  if (auto *MUD = MSSAU->getMemorySSA()->getMemoryAccess(IP))
1200  MSSAU->moveToPlace(MUD, WidenableBR->getParent(),
1202  Rewriter.setInsertPoint(IP);
1203  IRBuilder<> B(IP);
1204 
1205  bool InvalidateLoop = false;
1206  Value *MinECV = nullptr; // lazily generated if needed
1207  for (BasicBlock *ExitingBB : ExitingBlocks) {
1208  // If our exiting block exits multiple loops, we can only rewrite the
1209  // innermost one. Otherwise, we're changing how many times the innermost
1210  // loop runs before it exits.
1211  if (LI->getLoopFor(ExitingBB) != L)
1212  continue;
1213 
1214  // Can't rewrite non-branch yet.
1215  auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1216  if (!BI)
1217  continue;
1218 
1219  // If already constant, nothing to do.
1220  if (isa<Constant>(BI->getCondition()))
1221  continue;
1222 
1223  const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1224  if (isa<SCEVCouldNotCompute>(ExitCount) ||
1225  ExitCount->getType()->isPointerTy() ||
1226  !Rewriter.isSafeToExpandAt(ExitCount, WidenableBR))
1227  continue;
1228 
1229  const bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1230  BasicBlock *ExitBB = BI->getSuccessor(ExitIfTrue ? 0 : 1);
1231  if (!ExitBB->getPostdominatingDeoptimizeCall())
1232  continue;
1233 
1234  /// Here we can be fairly sure that executing this exit will most likely
1235  /// lead to executing llvm.experimental.deoptimize.
1236  /// This is a profitability heuristic, not a legality constraint.
1237 
1238  // If we found a widenable exit condition, do two things:
1239  // 1) fold the widened exit test into the widenable condition
1240  // 2) fold the branch to untaken - avoids infinite looping
1241 
1242  Value *ECV = Rewriter.expandCodeFor(ExitCount);
1243  if (!MinECV)
1244  MinECV = Rewriter.expandCodeFor(MinEC);
1245  Value *RHS = MinECV;
1246  if (ECV->getType() != RHS->getType()) {
1247  Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1248  ECV = B.CreateZExt(ECV, WiderTy);
1249  RHS = B.CreateZExt(RHS, WiderTy);
1250  }
1251  assert(!Latch || DT->dominates(ExitingBB, Latch));
1252  Value *NewCond = B.CreateICmp(ICmpInst::ICMP_UGT, ECV, RHS);
1253  // Freeze poison or undef to an arbitrary bit pattern to ensure we can
1254  // branch without introducing UB. See NOTE ON POISON/UNDEF above for
1255  // context.
1256  NewCond = B.CreateFreeze(NewCond);
1257 
1258  widenWidenableBranch(WidenableBR, NewCond);
1259 
1260  Value *OldCond = BI->getCondition();
1261  BI->setCondition(ConstantInt::get(OldCond->getType(), !ExitIfTrue));
1262  InvalidateLoop = true;
1263  }
1264 
1265  if (InvalidateLoop)
1266  // We just mutated a bunch of loop exits changing there exit counts
1267  // widely. We need to force recomputation of the exit counts given these
1268  // changes. Note that all of the inserted exits are never taken, and
1269  // should be removed next time the CFG is modified.
1270  SE->forgetLoop(L);
1271 
1272  // Always return `true` since we have moved the WidenableBR's condition.
1273  return true;
1274 }
1275 
1276 bool LoopPredication::runOnLoop(Loop *Loop) {
1277  L = Loop;
1278 
1279  LLVM_DEBUG(dbgs() << "Analyzing ");
1280  LLVM_DEBUG(L->dump());
1281 
1282  Module *M = L->getHeader()->getModule();
1283 
1284  // There is nothing to do if the module doesn't use guards
1285  auto *GuardDecl =
1286  M->getFunction(Intrinsic::getName(Intrinsic::experimental_guard));
1287  bool HasIntrinsicGuards = GuardDecl && !GuardDecl->use_empty();
1288  auto *WCDecl = M->getFunction(
1289  Intrinsic::getName(Intrinsic::experimental_widenable_condition));
1290  bool HasWidenableConditions =
1291  PredicateWidenableBranchGuards && WCDecl && !WCDecl->use_empty();
1292  if (!HasIntrinsicGuards && !HasWidenableConditions)
1293  return false;
1294 
1295  DL = &M->getDataLayout();
1296 
1297  Preheader = L->getLoopPreheader();
1298  if (!Preheader)
1299  return false;
1300 
1301  auto LatchCheckOpt = parseLoopLatchICmp();
1302  if (!LatchCheckOpt)
1303  return false;
1304  LatchCheck = *LatchCheckOpt;
1305 
1306  LLVM_DEBUG(dbgs() << "Latch check:\n");
1307  LLVM_DEBUG(LatchCheck.dump());
1308 
1309  if (!isLoopProfitableToPredicate()) {
1310  LLVM_DEBUG(dbgs() << "Loop not profitable to predicate!\n");
1311  return false;
1312  }
1313  // Collect all the guards into a vector and process later, so as not
1314  // to invalidate the instruction iterator.
1316  SmallVector<BranchInst *, 4> GuardsAsWidenableBranches;
1317  for (const auto BB : L->blocks()) {
1318  for (auto &I : *BB)
1319  if (isGuard(&I))
1320  Guards.push_back(cast<IntrinsicInst>(&I));
1322  isGuardAsWidenableBranch(BB->getTerminator()))
1323  GuardsAsWidenableBranches.push_back(
1324  cast<BranchInst>(BB->getTerminator()));
1325  }
1326 
1327  SCEVExpander Expander(*SE, *DL, "loop-predication");
1328  bool Changed = false;
1329  for (auto *Guard : Guards)
1330  Changed |= widenGuardConditions(Guard, Expander);
1331  for (auto *Guard : GuardsAsWidenableBranches)
1332  Changed |= widenWidenableBranchGuardConditions(Guard, Expander);
1333  Changed |= predicateLoopExits(L, Expander);
1334 
1335  if (MSSAU && VerifyMemorySSA)
1336  MSSAU->getMemorySSA()->verifyMemorySSA();
1337  return Changed;
1338 }
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:519
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
llvm::CmpInst::getSwappedPredicate
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:850
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:237
llvm::SCEV::isAllOnesValue
bool isAllOnesValue() const
Return true if the expression is a constant all-ones value.
Definition: ScalarEvolution.cpp:447
ScalarEvolutionExpander.h
Scalar.h
MemorySSAUpdater.h
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
llvm::Value::dump
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4919
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:139
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:50
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Statistic.h
llvm::Intrinsic::getName
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:942
llvm::IRBuilder<>
llvm::isModSet
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:742
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:834
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::ScalarEvolution::getTruncateExpr
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
Definition: ScalarEvolution.cpp:1214
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:142
llvm::SCEVExpander::expandCodeFor
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
Definition: ScalarEvolutionExpander.h:278
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::CmpInst::getFlippedStrictnessPredicate
Predicate getFlippedStrictnessPredicate() const
For predicate of kind "is X or equal to 0" returns the predicate "is X".
Definition: InstrTypes.h:916
llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:747
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::ICmpInst::isEquality
bool isEquality() const
Return true if this predicate is either EQ or NE.
Definition: Instructions.h:1281
Module.h
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:51
llvm::LoopBase::getExitEdges
void getExitEdges(SmallVectorImpl< Edge > &ExitEdges) const
Return all pairs of (inside_block,outside_block).
Definition: LoopInfoImpl.h:164
llvm::Optional
Definition: APInt.h:33
llvm::BranchProbability::getBranchProbability
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Definition: BranchProbability.cpp:53
llvm::SmallPtrSet< Value *, 4 >
llvm::CmpInst::ICMP_SLE
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:750
getMinAnalyzeableBackedgeTakenCount
static const SCEV * getMinAnalyzeableBackedgeTakenCount(ScalarEvolution &SE, DominatorTree &DT, Loop *L)
Return the minimum of all analyzeable exit counts.
Definition: LoopPredication.cpp:1087
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:877
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:54
llvm::LoopPredicationPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopPredication.cpp:370
llvm::SCEVExpander::isSafeToExpandAt
bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint) const
Return true if the given expression is safe to expand in the sense that all materialized values are d...
Definition: ScalarEvolutionExpander.cpp:2615
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::ScalarEvolution::getUMinFromMismatchedTypes
const SCEV * getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
Promote the operands to the wider of the types using zero-extension, and then perform a umin operatio...
Definition: ScalarEvolution.cpp:4685
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1298
AliasAnalysis.h
llvm::isGuard
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
CommandLine.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::initializeLoopPredicationLegacyPassPass
void initializeLoopPredicationLegacyPassPass(PassRegistry &)
EnableIVTruncation
static cl::opt< bool > EnableIVTruncation("loop-predication-enable-iv-truncation", cl::Hidden, cl::init(true))
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:984
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
Rewriter
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::AAResults
Definition: AliasAnalysis.h:294
llvm::BranchProbabilityInfoWrapperPass
Legacy analysis pass which computes BranchProbabilityInfo.
Definition: BranchProbabilityInfo.h:438
llvm::User
Definition: User.h:44
llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:746
LoopPredication
static cl::opt< bool > LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), cl::desc("Predicate conditions in read only loops"))
GuardUtils.h
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:195
IP
Definition: NVPTXLowerArgs.cpp:168
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:246
false
Definition: StackSlotColoring.cpp:141
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
SkipProfitabilityChecks
static cl::opt< bool > SkipProfitabilityChecks("loop-predication-skip-profitability-checks", cl::Hidden, cl::init(false))
llvm::ScalarEvolution::isKnownPredicate
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
Definition: ScalarEvolution.cpp:10757
llvm::Instruction
Definition: Instruction.h:42
llvm::BranchInst::setCondition
void setCondition(Value *V)
Definition: Instructions.h:3220
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1486
llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:33
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
LoopUtils.h
llvm::LPPassManager
Definition: LoopPass.h:76
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:147
PatternMatch.h
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::BasicBlock::getPostdominatingDeoptimizeCall
const CallInst * getPostdominatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize that is present either in current ...
Definition: BasicBlock.cpp:197
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3215
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopPredicationLegacyPass, "loop-predication", "Loop predication", false, false) INITIALIZE_PASS_END(LoopPredicationLegacyPass
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1292
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:138
llvm::cl::opt< bool >
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
STATISTIC
STATISTIC(TotalConsidered, "Number of guards considered")
BranchProbabilityInfo.h
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1186
uint64_t
llvm::LoopPass
Definition: LoopPass.h:28
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:54
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:262
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
generateLoopLatchCheck
static std::optional< LoopICmp > generateLoopLatchCheck(const DataLayout &DL, ScalarEvolution &SE, const LoopICmp LatchCheck, Type *RangeCheckType)
Definition: LoopPredication.cpp:486
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::succ_begin
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:183
llvm::PatternMatch::m_And
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1093
llvm::ScalarEvolution::getMonotonicPredicateType
Optional< MonotonicPredicateType > getMonotonicPredicateType(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred)
If, for all loop invariant X, the predicate "LHS `Pred` X" is monotonically increasing or decreasing,...
Definition: ScalarEvolution.cpp:10816
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:232
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::widenWidenableBranch
void widenWidenableBranch(BranchInst *WidenableBR, Value *NewCond)
Given a branch we know is widenable (defined per Analysis/GuardUtils.h), widen it such that condition...
Definition: GuardUtils.cpp:82
llvm::CmpInst::ICMP_UGE
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:744
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
predication
loop predication
Definition: LoopPredication.cpp:363
llvm::MDNode
Metadata node.
Definition: Metadata.h:944
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:934
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:749
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1108
InsertAssumesOfPredicatedGuardsConditions
static cl::opt< bool > InsertAssumesOfPredicatedGuardsConditions("loop-predication-insert-assumes-of-predicated-guards-conditions", cl::Hidden, cl::desc("Whether or not we should insert assumes of conditions of " "predicated guards"), cl::init(true))
llvm::parseWidenableBranch
bool parseWidenableBranch(const User *U, Value *&Condition, Value *&WidenableCondition, BasicBlock *&IfTrueBB, BasicBlock *&IfFalseBB)
If U is widenable branch looking like: cond = ...
Definition: GuardUtils.cpp:44
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:138
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:745
LoopPass.h
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::BranchProbability
Definition: BranchProbability.h:30
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:55
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::Loop::hasLoopInvariantOperands
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
Definition: LoopInfo.cpp:66
normalizePredicate
static void normalizePredicate(ScalarEvolution *SE, Loop *L, LoopICmp &RC)
Definition: LoopPredication.cpp:676
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:354
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:827
llvm::None
constexpr std::nullopt_t None
Definition: None.h:27
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::isGuardAsWidenableBranch
bool isGuardAsWidenableBranch(const User *U)
Returns true iff U has semantics of a guard expressed in a form of a widenable conditional branch to ...
Definition: GuardUtils.cpp:29
llvm::MemorySSA::BeforeTerminator
@ BeforeTerminator
Definition: MemorySSA.h:788
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:342
Function.h
llvm::SCEVUnknown
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
Definition: ScalarEvolutionExpressions.h:571
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:105
llvm::SCEV::isOne
bool isOne() const
Return true if the expression is a constant one.
Definition: ScalarEvolution.cpp:441
llvm::ScalarEvolution::getCouldNotCompute
const SCEV * getCouldNotCompute()
Definition: ScalarEvolution.cpp:4375
llvm::Loop::dump
void dump() const
Definition: LoopInfo.cpp:667
llvm::CmpInst::ICMP_SGE
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:748
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:56
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:52
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
isSafeToTruncateWideIVType
static bool isSafeToTruncateWideIVType(const DataLayout &DL, ScalarEvolution &SE, const LoopICmp LatchCheck, Type *RangeCheckType)
Definition: LoopPredication.cpp:451
GuardUtils.h
ScalarEvolutionExpressions.h
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
MemorySSA.h
llvm::createLoopPredicationPass
Pass * createLoopPredicationPass()
Definition: LoopPredication.cpp:366
PredicateWidenableBranchGuards
static cl::opt< bool > PredicateWidenableBranchGuards("loop-predication-predicate-widenable-branches-to-deopt", cl::Hidden, cl::desc("Whether or not we should predicate guards " "expressed as widenable branches to deoptimize blocks"), cl::init(true))
llvm::CmpInst::ICMP_UGT
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:743
llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:810
llvm::LoopStandardAnalysisResults::MSSA
MemorySSA * MSSA
Definition: LoopAnalysisManager.h:61
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::PatternMatch
Definition: PatternMatch.h:47
IV
static const uint32_t IV[8]
Definition: blake3_impl.h:85
llvm::SmallVectorImpl< Value * >
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:403
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::VerifyMemorySSA
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:91
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:413
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3134
LoopPredication.h
FindWidenableTerminatorAboveLoop
static BranchInst * FindWidenableTerminatorAboveLoop(Loop *L, LoopInfo &LI)
If we can (cheaply) find a widenable branch which controls entry into the loop, return it.
Definition: LoopPredication.cpp:1053
llvm::MDString
A single uniqued string.
Definition: Metadata.h:612
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::ScalarEvolution::getExitCount
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
Definition: ScalarEvolution.cpp:8222
Debug.h
LatchExitProbabilityScale
static cl::opt< float > LatchExitProbabilityScale("loop-predication-latch-probability-scale", cl::Hidden, cl::init(2.0), cl::desc("scale factor for the latch probability. Value should be greater " "than 1. Lower values are ignored"))
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3213
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3227
EnableCountDownLoop
static cl::opt< bool > EnableCountDownLoop("loop-predication-enable-count-down-loop", cl::Hidden, cl::init(true))
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365