LLVM 22.0.0git
IndVarSimplify.cpp
Go to the documentation of this file.
1//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This transformation analyzes and transforms the induction variables (and
10// computations derived from them) into simpler forms suitable for subsequent
11// analysis and transformation.
12//
13// If the trip count of a loop is computable, this pass also makes the following
14// changes:
15// 1. The exit condition for the loop is canonicalized to compare the
16// induction value against the exit value. This turns loops like:
17// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18// 2. Any use outside of the loop of an expression derived from the indvar
19// is changed to compute the derived value outside of the loop, eliminating
20// the dependence on the exit value of the induction variable. If the only
21// purpose of the loop is to compute the exit value of some derived
22// expression, this transformation will make the loop dead.
23//
24//===----------------------------------------------------------------------===//
25
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/Statistic.h"
44#include "llvm/IR/BasicBlock.h"
45#include "llvm/IR/Constant.h"
47#include "llvm/IR/Constants.h"
48#include "llvm/IR/DataLayout.h"
50#include "llvm/IR/Dominators.h"
51#include "llvm/IR/Function.h"
52#include "llvm/IR/IRBuilder.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
57#include "llvm/IR/Intrinsics.h"
58#include "llvm/IR/PassManager.h"
60#include "llvm/IR/Type.h"
61#include "llvm/IR/Use.h"
62#include "llvm/IR/User.h"
63#include "llvm/IR/Value.h"
64#include "llvm/IR/ValueHandle.h"
67#include "llvm/Support/Debug.h"
76#include <cassert>
77#include <cstdint>
78#include <utility>
79
80using namespace llvm;
81using namespace PatternMatch;
82using namespace SCEVPatternMatch;
83
84#define DEBUG_TYPE "indvars"
85
86STATISTIC(NumWidened , "Number of indvars widened");
87STATISTIC(NumReplaced , "Number of exit values replaced");
88STATISTIC(NumLFTR , "Number of loop exit tests replaced");
89STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
90STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
91
93 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
94 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
96 clEnumValN(NeverRepl, "never", "never replace exit value"),
98 "only replace exit value when the cost is cheap"),
100 UnusedIndVarInLoop, "unusedindvarinloop",
101 "only replace exit value when it is an unused "
102 "induction variable in the loop and has cheap replacement cost"),
103 clEnumValN(NoHardUse, "noharduse",
104 "only replace exit values when loop def likely dead"),
105 clEnumValN(AlwaysRepl, "always",
106 "always replace exit value whenever possible")));
107
109 "indvars-post-increment-ranges", cl::Hidden,
110 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
111 cl::init(true));
112
113static cl::opt<bool>
114DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
115 cl::desc("Disable Linear Function Test Replace optimization"));
116
117static cl::opt<bool>
118LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
119 cl::desc("Predicate conditions in read only loops"));
120
122 "indvars-predicate-loop-traps", cl::Hidden, cl::init(true),
123 cl::desc("Predicate conditions that trap in loops with only local writes"));
124
125static cl::opt<bool>
126AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
127 cl::desc("Allow widening of indvars to eliminate s/zext"));
128
129namespace {
130
131class IndVarSimplify {
132 LoopInfo *LI;
133 ScalarEvolution *SE;
134 DominatorTree *DT;
135 const DataLayout &DL;
138 std::unique_ptr<MemorySSAUpdater> MSSAU;
139
141 bool WidenIndVars;
142
143 bool RunUnswitching = false;
144
145 bool handleFloatingPointIV(Loop *L, PHINode *PH);
146 bool rewriteNonIntegerIVs(Loop *L);
147
148 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
149 /// Try to improve our exit conditions by converting condition from signed
150 /// to unsigned or rotating computation out of the loop.
151 /// (See inline comment about why this is duplicated from simplifyAndExtend)
152 bool canonicalizeExitCondition(Loop *L);
153 /// Try to eliminate loop exits based on analyzeable exit counts
154 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
155 /// Try to form loop invariant tests for loop exits by changing how many
156 /// iterations of the loop run when that is unobservable.
157 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
158
159 bool rewriteFirstIterationLoopExitValues(Loop *L);
160
161 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
162 const SCEV *ExitCount,
163 PHINode *IndVar, SCEVExpander &Rewriter);
164
165 bool sinkUnusedInvariants(Loop *L);
166
167public:
168 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
169 const DataLayout &DL, TargetLibraryInfo *TLI,
170 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
171 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
172 WidenIndVars(WidenIndVars) {
173 if (MSSA)
174 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
175 }
176
177 bool run(Loop *L);
178
179 bool runUnswitching() const { return RunUnswitching; }
180};
181
182} // end anonymous namespace
183
184//===----------------------------------------------------------------------===//
185// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
186//===----------------------------------------------------------------------===//
187
188/// Convert APF to an integer, if possible.
189static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
190 bool isExact = false;
191 // See if we can convert this to an int64_t
192 uint64_t UIntVal;
193 if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
194 APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
195 !isExact)
196 return false;
197 IntVal = UIntVal;
198 return true;
199}
200
201/// Ensure we stay within the bounds of fp values that can be represented as
202/// integers without gaps, which are 2^24 and 2^53 for IEEE-754 single and
203/// double precision respectively (both on negative and positive side).
204static bool isRepresentableAsExactInteger(const APFloat &FPVal,
205 int64_t IntVal) {
206 const auto &FltSema = FPVal.getSemantics();
207 if (!APFloat::isIEEELikeFP(FltSema))
208 return false;
209 return isUIntN(APFloat::semanticsPrecision(FltSema), AbsoluteValue(IntVal));
210}
211
212/// Represents a floating-point induction variable pattern that may be
213/// convertible to integer form.
226
227/// Represents the integer values for a converted IV.
234
236 switch (FPPred) {
239 return CmpInst::ICMP_EQ;
242 return CmpInst::ICMP_NE;
245 return CmpInst::ICMP_SGT;
248 return CmpInst::ICMP_SGE;
251 return CmpInst::ICMP_SLT;
254 return CmpInst::ICMP_SLE;
255 default:
257 }
258}
259
260/// Analyze a PN to determine whether it represents a simple floating-point
261/// induction variable, with constant fp init, increment, and exit values.
262///
263/// Returns a FloatingPointIV struct if matched, std::nullopt otherwise.
264static std::optional<FloatingPointIV>
266 // Identify incoming and backedge for the PN.
267 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
268 unsigned BackEdge = IncomingEdge ^ 1;
269
270 // Check incoming value.
271 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
272 if (!InitValueVal)
273 return std::nullopt;
274
275 // Check IV increment. Reject this PN if increment operation is not
276 // an add or increment value can not be represented by an integer.
277 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
278 if (!Incr || Incr->getOpcode() != Instruction::FAdd)
279 return std::nullopt;
280
281 // If this is not an add of the PHI with a constantfp, or if the constant fp
282 // is not an integer, bail out.
283 auto *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
284 if (!IncValueVal || Incr->getOperand(0) != PN)
285 return std::nullopt;
286
287 // Check Incr uses. One user is PN and the other user is an exit condition
288 // used by the conditional terminator.
289 // TODO: Should relax this, so as to allow any `fpext` that may occur.
290 if (!Incr->hasNUses(2))
291 return std::nullopt;
292
293 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
294 // only used by a branch, we can't transform it.
295 auto It = llvm::find_if(Incr->users(),
296 [](const User *U) { return isa<FCmpInst>(U); });
297 if (It == Incr->users().end())
298 return std::nullopt;
299
300 FCmpInst *Compare = cast<FCmpInst>(*It);
301 if (!Compare->hasOneUse())
302 return std::nullopt;
303
304 // We need to verify that the branch actually controls the iteration count
305 // of the loop. If not, the new IV can overflow and no one will notice.
306 // The branch block must be in the loop and one of the successors must be out
307 // of the loop.
308 auto *BI = dyn_cast<BranchInst>(Compare->user_back());
309 if (!BI)
310 return std::nullopt;
311
312 assert(BI->isConditional() && "Can't use fcmp if not conditional");
313 if (!L->contains(BI->getParent()) ||
314 (L->contains(BI->getSuccessor(0)) && L->contains(BI->getSuccessor(1))))
315 return std::nullopt;
316
317 // If it isn't a comparison with an integer-as-fp (the exit value), we can't
318 // transform it.
319 auto *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
320 if (!ExitValueVal)
321 return std::nullopt;
322
323 return FloatingPointIV(InitValueVal->getValueAPF(),
324 IncValueVal->getValueAPF(),
325 ExitValueVal->getValueAPF(), Compare, Incr);
326}
327
328/// Ensure that the floating-point IV can be converted to a semantics-preserving
329/// signed 32-bit integer IV.
330///
331/// Returns a IntegerIV struct if possible, std::nullopt otherwise.
332static std::optional<IntegerIV>
334 // Convert floating-point predicate to integer.
335 auto NewPred = getIntegerPredicate(FPIV.Compare->getPredicate());
336 if (NewPred == CmpInst::BAD_ICMP_PREDICATE)
337 return std::nullopt;
338
339 // Convert APFloat values to signed integers.
340 int64_t InitValue, IncrValue, ExitValue;
341 if (!ConvertToSInt(FPIV.InitValue, InitValue) ||
342 !ConvertToSInt(FPIV.IncrValue, IncrValue) ||
343 !ConvertToSInt(FPIV.ExitValue, ExitValue))
344 return std::nullopt;
345
346 // Bail out if integers cannot be represented exactly.
347 if (!isRepresentableAsExactInteger(FPIV.InitValue, InitValue) ||
349 return std::nullopt;
350
351 // We convert the floating point induction variable to a signed i32 value if
352 // we can. This is only safe if the comparison will not overflow in a way that
353 // won't be trapped by the integer equivalent operations. Check for this now.
354 // TODO: We could use i64 if it is native and the range requires it.
355
356 // The start/stride/exit values must all fit in signed i32.
357 if (!isInt<32>(InitValue) || !isInt<32>(IncrValue) || !isInt<32>(ExitValue))
358 return std::nullopt;
359
360 // If not actually striding (add x, 0.0), avoid touching the code.
361 if (IncrValue == 0)
362 return std::nullopt;
363
364 // Positive and negative strides have different safety conditions.
365 if (IncrValue > 0) {
366 // If we have a positive stride, we require the init to be less than the
367 // exit value.
368 if (InitValue >= ExitValue)
369 return std::nullopt;
370
371 uint32_t Range = uint32_t(ExitValue - InitValue);
372 // Check for infinite loop, either:
373 // while (i <= Exit) or until (i > Exit)
374 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
375 if (++Range == 0)
376 return std::nullopt; // Range overflows.
377 }
378
379 unsigned Leftover = Range % uint32_t(IncrValue);
380
381 // If this is an equality comparison, we require that the strided value
382 // exactly land on the exit value, otherwise the IV condition will wrap
383 // around and do things the fp IV wouldn't.
384 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
385 Leftover != 0)
386 return std::nullopt;
387
388 // If the stride would wrap around the i32 before exiting, we can't
389 // transform the IV.
390 if (Leftover != 0 && int32_t(ExitValue + IncrValue) < ExitValue)
391 return std::nullopt;
392 } else {
393 // If we have a negative stride, we require the init to be greater than the
394 // exit value.
395 if (InitValue <= ExitValue)
396 return std::nullopt;
397
398 uint32_t Range = uint32_t(InitValue - ExitValue);
399 // Check for infinite loop, either:
400 // while (i >= Exit) or until (i < Exit)
401 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
402 if (++Range == 0)
403 return std::nullopt; // Range overflows.
404 }
405
406 unsigned Leftover = Range % uint32_t(-IncrValue);
407
408 // If this is an equality comparison, we require that the strided value
409 // exactly land on the exit value, otherwise the IV condition will wrap
410 // around and do things the fp IV wouldn't.
411 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
412 Leftover != 0)
413 return std::nullopt;
414
415 // If the stride would wrap around the i32 before exiting, we can't
416 // transform the IV.
417 if (Leftover != 0 && int32_t(ExitValue + IncrValue) > ExitValue)
418 return std::nullopt;
419 }
420
421 return IntegerIV{InitValue, IncrValue, ExitValue, NewPred};
422}
423
424/// Rewrite the floating-point IV as an integer IV.
426 const FloatingPointIV &FPIV,
427 const IntegerIV &IIV,
428 const TargetLibraryInfo *TLI,
429 std::unique_ptr<MemorySSAUpdater> &MSSAU) {
430 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
431 unsigned BackEdge = IncomingEdge ^ 1;
432
434 auto *Incr = cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
435 auto *BI = cast<BranchInst>(FPIV.Compare->user_back());
436
437 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting floating-point IV to integer IV:\n"
438 << " Init: " << IIV.InitValue << "\n"
439 << " Incr: " << IIV.IncrValue << "\n"
440 << " Exit: " << IIV.ExitValue << "\n"
441 << " Pred: " << CmpInst::getPredicateName(IIV.NewPred)
442 << "\n"
443 << " Original PN: " << *PN << "\n");
444
445 // Insert new integer induction variable.
446 PHINode *NewPHI =
447 PHINode::Create(Int32Ty, 2, PN->getName() + ".int", PN->getIterator());
449 PN->getIncomingBlock(IncomingEdge));
450 NewPHI->setDebugLoc(PN->getDebugLoc());
451
452 Instruction *NewAdd = BinaryOperator::CreateAdd(
454 Incr->getName() + ".int", Incr->getIterator());
455 NewAdd->setDebugLoc(Incr->getDebugLoc());
456 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
457
458 ICmpInst *NewCompare = new ICmpInst(
459 BI->getIterator(), IIV.NewPred, NewAdd,
461 NewCompare->setDebugLoc(FPIV.Compare->getDebugLoc());
462
463 // In the following deletions, PN may become dead and may be deleted.
464 // Use a WeakTrackingVH to observe whether this happens.
465 WeakTrackingVH WeakPH = PN;
466
467 // Delete the old floating point exit comparison. The branch starts using the
468 // new comparison.
469 NewCompare->takeName(FPIV.Compare);
470 FPIV.Compare->replaceAllUsesWith(NewCompare);
472
473 // Delete the old floating point increment.
474 Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
475 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
476
477 // If the FP induction variable still has uses, this is because something else
478 // in the loop uses its value. In order to canonicalize the induction
479 // variable, we chose to eliminate the IV and rewrite it in terms of an
480 // int->fp cast.
481 //
482 // We give preference to sitofp over uitofp because it is faster on most
483 // platforms.
484 if (WeakPH) {
485 Instruction *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
486 PN->getParent()->getFirstInsertionPt());
487 Conv->setDebugLoc(PN->getDebugLoc());
488 PN->replaceAllUsesWith(Conv);
489 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
490 }
491}
492
493/// If the loop has a floating induction variable, then insert corresponding
494/// integer induction variable if possible. For example, the following:
495/// for(double i = 0; i < 10000; ++i)
496/// bar(i)
497/// is converted into
498/// for(int i = 0; i < 10000; ++i)
499/// bar((double)i);
500bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
501 // See if the PN matches a floating-point IV pattern.
502 auto FPIV = maybeFloatingPointRecurrence(L, PN);
503 if (!FPIV)
504 return false;
505
506 // Can we safely convert the floating-point values to integer ones?
507 auto IIV = tryConvertToIntegerIV(*FPIV);
508 if (!IIV)
509 return false;
510
511 // Perform the rewriting.
512 canonicalizeToIntegerIV(L, PN, *FPIV, *IIV, TLI, MSSAU);
513 return true;
514}
515
516bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
517 // First step. Check to see if there are any floating-point recurrences.
518 // If there are, change them into integer recurrences, permitting analysis by
519 // the SCEV routines.
520 BasicBlock *Header = L->getHeader();
521
523
524 bool Changed = false;
525 for (WeakTrackingVH &PHI : PHIs)
526 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHI))
527 Changed |= handleFloatingPointIV(L, PN);
528
529 // If the loop previously had floating-point IV, ScalarEvolution
530 // may not have been able to compute a trip count. Now that we've done some
531 // re-writing, the trip count may be computable.
532 if (Changed)
533 SE->forgetLoop(L);
534 return Changed;
535}
536
537//===---------------------------------------------------------------------===//
538// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
539// they will exit at the first iteration.
540//===---------------------------------------------------------------------===//
541
542/// Check to see if this loop has loop invariant conditions which lead to loop
543/// exits. If so, we know that if the exit path is taken, it is at the first
544/// loop iteration. This lets us predict exit values of PHI nodes that live in
545/// loop header.
546bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
547 // Verify the input to the pass is already in LCSSA form.
548 assert(L->isLCSSAForm(*DT));
549
550 SmallVector<BasicBlock *, 8> ExitBlocks;
551 L->getUniqueExitBlocks(ExitBlocks);
552
553 bool MadeAnyChanges = false;
554 for (auto *ExitBB : ExitBlocks) {
555 // If there are no more PHI nodes in this exit block, then no more
556 // values defined inside the loop are used on this path.
557 for (PHINode &PN : ExitBB->phis()) {
558 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
559 IncomingValIdx != E; ++IncomingValIdx) {
560 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
561
562 // Can we prove that the exit must run on the first iteration if it
563 // runs at all? (i.e. early exits are fine for our purposes, but
564 // traces which lead to this exit being taken on the 2nd iteration
565 // aren't.) Note that this is about whether the exit branch is
566 // executed, not about whether it is taken.
567 if (!L->getLoopLatch() ||
568 !DT->dominates(IncomingBB, L->getLoopLatch()))
569 continue;
570
571 // Get condition that leads to the exit path.
572 auto *TermInst = IncomingBB->getTerminator();
573
574 Value *Cond = nullptr;
575 if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
576 // Must be a conditional branch, otherwise the block
577 // should not be in the loop.
578 Cond = BI->getCondition();
579 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
580 Cond = SI->getCondition();
581 else
582 continue;
583
584 if (!L->isLoopInvariant(Cond))
585 continue;
586
587 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
588
589 // Only deal with PHIs in the loop header.
590 if (!ExitVal || ExitVal->getParent() != L->getHeader())
591 continue;
592
593 // If ExitVal is a PHI on the loop header, then we know its
594 // value along this exit because the exit can only be taken
595 // on the first iteration.
596 auto *LoopPreheader = L->getLoopPreheader();
597 assert(LoopPreheader && "Invalid loop");
598 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
599 if (PreheaderIdx != -1) {
600 assert(ExitVal->getParent() == L->getHeader() &&
601 "ExitVal must be in loop header");
602 MadeAnyChanges = true;
603 PN.setIncomingValue(IncomingValIdx,
604 ExitVal->getIncomingValue(PreheaderIdx));
605 SE->forgetValue(&PN);
606 }
607 }
608 }
609 }
610 return MadeAnyChanges;
611}
612
613//===----------------------------------------------------------------------===//
614// IV Widening - Extend the width of an IV to cover its widest uses.
615//===----------------------------------------------------------------------===//
616
617/// Update information about the induction variable that is extended by this
618/// sign or zero extend operation. This is used to determine the final width of
619/// the IV before actually widening it.
620static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
621 ScalarEvolution *SE,
622 const TargetTransformInfo *TTI) {
623 bool IsSigned = Cast->getOpcode() == Instruction::SExt;
624 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
625 return;
626
627 Type *Ty = Cast->getType();
628 uint64_t Width = SE->getTypeSizeInBits(Ty);
629 if (!Cast->getDataLayout().isLegalInteger(Width))
630 return;
631
632 // Check that `Cast` actually extends the induction variable (we rely on this
633 // later). This takes care of cases where `Cast` is extending a truncation of
634 // the narrow induction variable, and thus can end up being narrower than the
635 // "narrow" induction variable.
636 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
637 if (NarrowIVWidth >= Width)
638 return;
639
640 // Cast is either an sext or zext up to this point.
641 // We should not widen an indvar if arithmetics on the wider indvar are more
642 // expensive than those on the narrower indvar. We check only the cost of ADD
643 // because at least an ADD is required to increment the induction variable. We
644 // could compute more comprehensively the cost of all instructions on the
645 // induction variable when necessary.
646 if (TTI &&
647 TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
648 TTI->getArithmeticInstrCost(Instruction::Add,
649 Cast->getOperand(0)->getType())) {
650 return;
651 }
652
653 if (!WI.WidestNativeType ||
654 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
656 WI.IsSigned = IsSigned;
657 return;
658 }
659
660 // We extend the IV to satisfy the sign of its user(s), or 'signed'
661 // if there are multiple users with both sign- and zero extensions,
662 // in order not to introduce nondeterministic behaviour based on the
663 // unspecified order of a PHI nodes' users-iterator.
664 WI.IsSigned |= IsSigned;
665}
666
667//===----------------------------------------------------------------------===//
668// Live IV Reduction - Minimize IVs live across the loop.
669//===----------------------------------------------------------------------===//
670
671//===----------------------------------------------------------------------===//
672// Simplification of IV users based on SCEV evaluation.
673//===----------------------------------------------------------------------===//
674
675namespace {
676
677class IndVarSimplifyVisitor : public IVVisitor {
678 ScalarEvolution *SE;
679 const TargetTransformInfo *TTI;
680 PHINode *IVPhi;
681
682public:
683 WideIVInfo WI;
684
685 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
686 const TargetTransformInfo *TTI,
687 const DominatorTree *DTree)
688 : SE(SCEV), TTI(TTI), IVPhi(IV) {
689 DT = DTree;
690 WI.NarrowIV = IVPhi;
691 }
692
693 // Implement the interface used by simplifyUsersOfIV.
694 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
695};
696
697} // end anonymous namespace
698
699/// Iteratively perform simplification on a worklist of IV users. Each
700/// successive simplification may push more users which may themselves be
701/// candidates for simplification.
702///
703/// Sign/Zero extend elimination is interleaved with IV simplification.
704bool IndVarSimplify::simplifyAndExtend(Loop *L,
705 SCEVExpander &Rewriter,
706 LoopInfo *LI) {
708
709 auto *GuardDecl = Intrinsic::getDeclarationIfExists(
710 L->getBlocks()[0]->getModule(), Intrinsic::experimental_guard);
711 bool HasGuards = GuardDecl && !GuardDecl->use_empty();
712
714 llvm::make_pointer_range(L->getHeader()->phis()));
715
716 // Each round of simplification iterates through the SimplifyIVUsers worklist
717 // for all current phis, then determines whether any IVs can be
718 // widened. Widening adds new phis to LoopPhis, inducing another round of
719 // simplification on the wide IVs.
720 bool Changed = false;
721 while (!LoopPhis.empty()) {
722 // Evaluate as many IV expressions as possible before widening any IVs. This
723 // forces SCEV to set no-wrap flags before evaluating sign/zero
724 // extension. The first time SCEV attempts to normalize sign/zero extension,
725 // the result becomes final. So for the most predictable results, we delay
726 // evaluation of sign/zero extend evaluation until needed, and avoid running
727 // other SCEV based analysis prior to simplifyAndExtend.
728 do {
729 PHINode *CurrIV = LoopPhis.pop_back_val();
730
731 // Information about sign/zero extensions of CurrIV.
732 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
733
734 const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts,
735 Rewriter, &Visitor);
736
737 Changed |= C;
738 RunUnswitching |= U;
739 if (Visitor.WI.WidestNativeType) {
740 WideIVs.push_back(Visitor.WI);
741 }
742 } while(!LoopPhis.empty());
743
744 // Continue if we disallowed widening.
745 if (!WidenIndVars)
746 continue;
747
748 for (; !WideIVs.empty(); WideIVs.pop_back()) {
749 unsigned ElimExt;
750 unsigned Widened;
751 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
752 DT, DeadInsts, ElimExt, Widened,
753 HasGuards, UsePostIncrementRanges)) {
754 NumElimExt += ElimExt;
755 NumWidened += Widened;
756 Changed = true;
757 LoopPhis.push_back(WidePhi);
758 }
759 }
760 }
761 return Changed;
762}
763
764//===----------------------------------------------------------------------===//
765// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
766//===----------------------------------------------------------------------===//
767
768/// Given an Value which is hoped to be part of an add recurance in the given
769/// loop, return the associated Phi node if so. Otherwise, return null. Note
770/// that this is less general than SCEVs AddRec checking.
773 if (!IncI)
774 return nullptr;
775
776 switch (IncI->getOpcode()) {
777 case Instruction::Add:
778 case Instruction::Sub:
779 break;
780 case Instruction::GetElementPtr:
781 // An IV counter must preserve its type.
782 if (IncI->getNumOperands() == 2)
783 break;
784 [[fallthrough]];
785 default:
786 return nullptr;
787 }
788
789 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
790 if (Phi && Phi->getParent() == L->getHeader()) {
791 if (L->isLoopInvariant(IncI->getOperand(1)))
792 return Phi;
793 return nullptr;
794 }
795 if (IncI->getOpcode() == Instruction::GetElementPtr)
796 return nullptr;
797
798 // Allow add/sub to be commuted.
799 Phi = dyn_cast<PHINode>(IncI->getOperand(1));
800 if (Phi && Phi->getParent() == L->getHeader()) {
801 if (L->isLoopInvariant(IncI->getOperand(0)))
802 return Phi;
803 }
804 return nullptr;
805}
806
807/// Whether the current loop exit test is based on this value. Currently this
808/// is limited to a direct use in the loop condition.
809static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
810 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
812 // TODO: Allow non-icmp loop test.
813 if (!ICmp)
814 return false;
815
816 // TODO: Allow indirect use.
817 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
818}
819
820/// linearFunctionTestReplace policy. Return true unless we can show that the
821/// current exit test is already sufficiently canonical.
822static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
823 assert(L->getLoopLatch() && "Must be in simplified form");
824
825 // Avoid converting a constant or loop invariant test back to a runtime
826 // test. This is critical for when SCEV's cached ExitCount is less precise
827 // than the current IR (such as after we've proven a particular exit is
828 // actually dead and thus the BE count never reaches our ExitCount.)
829 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
830 if (L->isLoopInvariant(BI->getCondition()))
831 return false;
832
833 // Do LFTR to simplify the exit condition to an ICMP.
835 if (!Cond)
836 return true;
837
838 // Do LFTR to simplify the exit ICMP to EQ/NE
839 ICmpInst::Predicate Pred = Cond->getPredicate();
840 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
841 return true;
842
843 // Look for a loop invariant RHS
844 Value *LHS = Cond->getOperand(0);
845 Value *RHS = Cond->getOperand(1);
846 if (!L->isLoopInvariant(RHS)) {
847 if (!L->isLoopInvariant(LHS))
848 return true;
849 std::swap(LHS, RHS);
850 }
851 // Look for a simple IV counter LHS
853 if (!Phi)
854 Phi = getLoopPhiForCounter(LHS, L);
855
856 if (!Phi)
857 return true;
858
859 // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
860 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
861 if (Idx < 0)
862 return true;
863
864 // Do LFTR if the exit condition's IV is *not* a simple counter.
865 Value *IncV = Phi->getIncomingValue(Idx);
866 return Phi != getLoopPhiForCounter(IncV, L);
867}
868
869/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
870/// down to checking that all operands are constant and listing instructions
871/// that may hide undef.
873 unsigned Depth) {
874 if (isa<Constant>(V))
875 return !isa<UndefValue>(V);
876
877 if (Depth >= 6)
878 return false;
879
880 // Conservatively handle non-constant non-instructions. For example, Arguments
881 // may be undef.
883 if (!I)
884 return false;
885
886 // Load and return values may be undef.
887 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
888 return false;
889
890 // Optimistically handle other instructions.
891 for (Value *Op : I->operands()) {
892 if (!Visited.insert(Op).second)
893 continue;
894 if (!hasConcreteDefImpl(Op, Visited, Depth+1))
895 return false;
896 }
897 return true;
898}
899
900/// Return true if the given value is concrete. We must prove that undef can
901/// never reach it.
902///
903/// TODO: If we decide that this is a good approach to checking for undef, we
904/// may factor it into a common location.
905static bool hasConcreteDef(Value *V) {
907 Visited.insert(V);
908 return hasConcreteDefImpl(V, Visited, 0);
909}
910
911/// Return true if the given phi is a "counter" in L. A counter is an
912/// add recurance (of integer or pointer type) with an arbitrary start, and a
913/// step of 1. Note that L must have exactly one latch.
914static bool isLoopCounter(PHINode* Phi, Loop *L,
915 ScalarEvolution *SE) {
916 assert(Phi->getParent() == L->getHeader());
917 assert(L->getLoopLatch());
918
919 if (!SE->isSCEVable(Phi->getType()))
920 return false;
921
922 const SCEV *S = SE->getSCEV(Phi);
924 return false;
925
926 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
927 Value *IncV = Phi->getIncomingValue(LatchIdx);
928 return (getLoopPhiForCounter(IncV, L) == Phi &&
929 isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
930}
931
932/// Search the loop header for a loop counter (anadd rec w/step of one)
933/// suitable for use by LFTR. If multiple counters are available, select the
934/// "best" one based profitable heuristics.
935///
936/// BECount may be an i8* pointer type. The pointer difference is already
937/// valid count without scaling the address stride, so it remains a pointer
938/// expression as far as SCEV is concerned.
939static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
940 const SCEV *BECount,
942 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
943
944 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
945
946 // Loop over all of the PHI nodes, looking for a simple counter.
947 PHINode *BestPhi = nullptr;
948 const SCEV *BestInit = nullptr;
949 BasicBlock *LatchBlock = L->getLoopLatch();
950 assert(LatchBlock && "Must be in simplified form");
951 const DataLayout &DL = L->getHeader()->getDataLayout();
952
953 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
954 PHINode *Phi = cast<PHINode>(I);
955 if (!isLoopCounter(Phi, L, SE))
956 continue;
957
958 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
959
960 // AR may be a pointer type, while BECount is an integer type.
961 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
962 // AR may not be a narrower type, or we may never exit.
963 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
964 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
965 continue;
966
967 // Avoid reusing a potentially undef value to compute other values that may
968 // have originally had a concrete definition.
969 if (!hasConcreteDef(Phi)) {
970 // We explicitly allow unknown phis as long as they are already used by
971 // the loop exit test. This is legal since performing LFTR could not
972 // increase the number of undef users.
973 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
974 if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
975 !isLoopExitTestBasedOn(IncPhi, ExitingBB))
976 continue;
977 }
978
979 // Avoid introducing undefined behavior due to poison which didn't exist in
980 // the original program. (Annoyingly, the rules for poison and undef
981 // propagation are distinct, so this does NOT cover the undef case above.)
982 // We have to ensure that we don't introduce UB by introducing a use on an
983 // iteration where said IV produces poison. Our strategy here differs for
984 // pointers and integer IVs. For integers, we strip and reinfer as needed,
985 // see code in linearFunctionTestReplace. For pointers, we restrict
986 // transforms as there is no good way to reinfer inbounds once lost.
987 if (!Phi->getType()->isIntegerTy() &&
988 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
989 continue;
990
991 const SCEV *Init = AR->getStart();
992
993 if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) {
994 // Don't force a live loop counter if another IV can be used.
995 if (isAlmostDeadIV(Phi, LatchBlock, Cond))
996 continue;
997
998 // Prefer to count-from-zero. This is a more "canonical" counter form. It
999 // also prefers integer to pointer IVs.
1000 if (BestInit->isZero() != Init->isZero()) {
1001 if (BestInit->isZero())
1002 continue;
1003 }
1004 // If two IVs both count from zero or both count from nonzero then the
1005 // narrower is likely a dead phi that has been widened. Use the wider phi
1006 // to allow the other to be eliminated.
1007 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
1008 continue;
1009 }
1010 BestPhi = Phi;
1011 BestInit = Init;
1012 }
1013 return BestPhi;
1014}
1015
1016/// Insert an IR expression which computes the value held by the IV IndVar
1017/// (which must be an loop counter w/unit stride) after the backedge of loop L
1018/// is taken ExitCount times.
1019static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
1020 const SCEV *ExitCount, bool UsePostInc, Loop *L,
1021 SCEVExpander &Rewriter, ScalarEvolution *SE) {
1022 assert(isLoopCounter(IndVar, L, SE));
1023 assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
1024 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
1025 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
1026
1027 // For integer IVs, truncate the IV before computing the limit unless we
1028 // know apriori that the limit must be a constant when evaluated in the
1029 // bitwidth of the IV. We prefer (potentially) keeping a truncate of the
1030 // IV in the loop over a (potentially) expensive expansion of the widened
1031 // exit count add(zext(add)) expression.
1032 if (IndVar->getType()->isIntegerTy() &&
1033 SE->getTypeSizeInBits(AR->getType()) >
1034 SE->getTypeSizeInBits(ExitCount->getType())) {
1035 const SCEV *IVInit = AR->getStart();
1036 if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
1037 AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
1038 }
1039
1040 const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
1041 const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
1042 assert(SE->isLoopInvariant(IVLimit, L) &&
1043 "Computed iteration count is not loop invariant!");
1044 return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
1045 ExitingBB->getTerminator());
1046}
1047
1048/// This method rewrites the exit condition of the loop to be a canonical !=
1049/// comparison against the incremented loop induction variable. This pass is
1050/// able to rewrite the exit tests of any loop where the SCEV analysis can
1051/// determine a loop-invariant trip count of the loop, which is actually a much
1052/// broader range than just linear tests.
1053bool IndVarSimplify::
1054linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
1055 const SCEV *ExitCount,
1056 PHINode *IndVar, SCEVExpander &Rewriter) {
1057 assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1058 assert(isLoopCounter(IndVar, L, SE));
1059 Instruction * const IncVar =
1060 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
1061
1062 // Initialize CmpIndVar to the preincremented IV.
1063 Value *CmpIndVar = IndVar;
1064 bool UsePostInc = false;
1065
1066 // If the exiting block is the same as the backedge block, we prefer to
1067 // compare against the post-incremented value, otherwise we must compare
1068 // against the preincremented value.
1069 if (ExitingBB == L->getLoopLatch()) {
1070 // For pointer IVs, we chose to not strip inbounds which requires us not
1071 // to add a potentially UB introducing use. We need to either a) show
1072 // the loop test we're modifying is already in post-inc form, or b) show
1073 // that adding a use must not introduce UB.
1074 bool SafeToPostInc =
1075 IndVar->getType()->isIntegerTy() ||
1076 isLoopExitTestBasedOn(IncVar, ExitingBB) ||
1077 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
1078 if (SafeToPostInc) {
1079 UsePostInc = true;
1080 CmpIndVar = IncVar;
1081 }
1082 }
1083
1084 // It may be necessary to drop nowrap flags on the incrementing instruction
1085 // if either LFTR moves from a pre-inc check to a post-inc check (in which
1086 // case the increment might have previously been poison on the last iteration
1087 // only) or if LFTR switches to a different IV that was previously dynamically
1088 // dead (and as such may be arbitrarily poison). We remove any nowrap flags
1089 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1090 // check), because the pre-inc addrec flags may be adopted from the original
1091 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1092 // TODO: This handling is inaccurate for one case: If we switch to a
1093 // dynamically dead IV that wraps on the first loop iteration only, which is
1094 // not covered by the post-inc addrec. (If the new IV was not dynamically
1095 // dead, it could not be poison on the first iteration in the first place.)
1096 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
1097 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
1098 if (BO->hasNoUnsignedWrap())
1099 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1100 if (BO->hasNoSignedWrap())
1101 BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1102 }
1103
1104 Value *ExitCnt = genLoopLimit(
1105 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1106 assert(ExitCnt->getType()->isPointerTy() ==
1107 IndVar->getType()->isPointerTy() &&
1108 "genLoopLimit missed a cast");
1109
1110 // Insert a new icmp_ne or icmp_eq instruction before the branch.
1111 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1112 ICmpInst::Predicate P;
1113 if (L->contains(BI->getSuccessor(0)))
1114 P = ICmpInst::ICMP_NE;
1115 else
1116 P = ICmpInst::ICMP_EQ;
1117
1118 IRBuilder<> Builder(BI);
1119
1120 // The new loop exit condition should reuse the debug location of the
1121 // original loop exit condition.
1122 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1123 Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1124
1125 // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1126 // avoid the expensive expansion of the limit expression in the wider type,
1127 // emit a truncate to narrow the IV to the ExitCount type. This is safe
1128 // since we know (from the exit count bitwidth), that we can't self-wrap in
1129 // the narrower type.
1130 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1131 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1132 if (CmpIndVarSize > ExitCntSize) {
1133 assert(!CmpIndVar->getType()->isPointerTy() &&
1134 !ExitCnt->getType()->isPointerTy());
1135
1136 // Before resorting to actually inserting the truncate, use the same
1137 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1138 // the other side of the comparison instead. We still evaluate the limit
1139 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1140 // a truncate within in.
1141 bool Extended = false;
1142 const SCEV *IV = SE->getSCEV(CmpIndVar);
1143 const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType());
1144 const SCEV *ZExtTrunc =
1145 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1146
1147 if (ZExtTrunc == IV) {
1148 Extended = true;
1149 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1150 "wide.trip.count");
1151 } else {
1152 const SCEV *SExtTrunc =
1153 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1154 if (SExtTrunc == IV) {
1155 Extended = true;
1156 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1157 "wide.trip.count");
1158 }
1159 }
1160
1161 if (Extended) {
1162 bool Discard;
1163 L->makeLoopInvariant(ExitCnt, Discard);
1164 } else
1165 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1166 "lftr.wideiv");
1167 }
1168 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1169 << " LHS:" << *CmpIndVar << '\n'
1170 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1171 << "\n"
1172 << " RHS:\t" << *ExitCnt << "\n"
1173 << "ExitCount:\t" << *ExitCount << "\n"
1174 << " was: " << *BI->getCondition() << "\n");
1175
1176 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1177 Value *OrigCond = BI->getCondition();
1178 // It's tempting to use replaceAllUsesWith here to fully replace the old
1179 // comparison, but that's not immediately safe, since users of the old
1180 // comparison may not be dominated by the new comparison. Instead, just
1181 // update the branch to use the new comparison; in the common case this
1182 // will make old comparison dead.
1183 BI->setCondition(Cond);
1184 DeadInsts.emplace_back(OrigCond);
1185
1186 ++NumLFTR;
1187 return true;
1188}
1189
1190//===----------------------------------------------------------------------===//
1191// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1192//===----------------------------------------------------------------------===//
1193
1194/// If there's a single exit block, sink any loop-invariant values that
1195/// were defined in the preheader but not used inside the loop into the
1196/// exit block to reduce register pressure in the loop.
1197bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1198 BasicBlock *ExitBlock = L->getExitBlock();
1199 if (!ExitBlock) return false;
1200
1201 BasicBlock *Preheader = L->getLoopPreheader();
1202 if (!Preheader) return false;
1203
1204 bool MadeAnyChanges = false;
1205 for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
1206
1207 // Skip BB Terminator.
1208 if (Preheader->getTerminator() == &I)
1209 continue;
1210
1211 // New instructions were inserted at the end of the preheader.
1212 if (isa<PHINode>(I))
1213 break;
1214
1215 // Don't move instructions which might have side effects, since the side
1216 // effects need to complete before instructions inside the loop. Also don't
1217 // move instructions which might read memory, since the loop may modify
1218 // memory. Note that it's okay if the instruction might have undefined
1219 // behavior: LoopSimplify guarantees that the preheader dominates the exit
1220 // block.
1221 if (I.mayHaveSideEffects() || I.mayReadFromMemory())
1222 continue;
1223
1224 // Skip debug or pseudo instructions.
1225 if (I.isDebugOrPseudoInst())
1226 continue;
1227
1228 // Skip eh pad instructions.
1229 if (I.isEHPad())
1230 continue;
1231
1232 // Don't sink alloca: we never want to sink static alloca's out of the
1233 // entry block, and correctly sinking dynamic alloca's requires
1234 // checks for stacksave/stackrestore intrinsics.
1235 // FIXME: Refactor this check somehow?
1236 if (isa<AllocaInst>(&I))
1237 continue;
1238
1239 // Determine if there is a use in or before the loop (direct or
1240 // otherwise).
1241 bool UsedInLoop = false;
1242 for (Use &U : I.uses()) {
1243 Instruction *User = cast<Instruction>(U.getUser());
1244 BasicBlock *UseBB = User->getParent();
1245 if (PHINode *P = dyn_cast<PHINode>(User)) {
1246 unsigned i =
1248 UseBB = P->getIncomingBlock(i);
1249 }
1250 if (UseBB == Preheader || L->contains(UseBB)) {
1251 UsedInLoop = true;
1252 break;
1253 }
1254 }
1255
1256 // If there is, the def must remain in the preheader.
1257 if (UsedInLoop)
1258 continue;
1259
1260 // Otherwise, sink it to the exit block.
1261 I.moveBefore(ExitBlock->getFirstInsertionPt());
1262 SE->forgetValue(&I);
1263 MadeAnyChanges = true;
1264 }
1265
1266 return MadeAnyChanges;
1267}
1268
1269static void replaceExitCond(BranchInst *BI, Value *NewCond,
1271 auto *OldCond = BI->getCondition();
1272 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1273 << " with " << *NewCond << "\n");
1274 BI->setCondition(NewCond);
1275 if (OldCond->use_empty())
1276 DeadInsts.emplace_back(OldCond);
1277}
1278
1279static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1280 bool IsTaken) {
1281 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1282 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1283 auto *OldCond = BI->getCondition();
1284 return ConstantInt::get(OldCond->getType(),
1285 IsTaken ? ExitIfTrue : !ExitIfTrue);
1286}
1287
1288static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1290 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1291 auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1292 replaceExitCond(BI, NewCond, DeadInsts);
1293}
1294
1296 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1297 ScalarEvolution &SE) {
1298 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1299 auto *LoopPreheader = L->getLoopPreheader();
1300 auto *LoopHeader = L->getHeader();
1302 for (auto &PN : LoopHeader->phis()) {
1303 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1304 for (User *U : PN.users())
1305 Worklist.push_back(cast<Instruction>(U));
1306 SE.forgetValue(&PN);
1307 PN.replaceAllUsesWith(PreheaderIncoming);
1308 DeadInsts.emplace_back(&PN);
1309 }
1310
1311 // Replacing with the preheader value will often allow IV users to simplify
1312 // (especially if the preheader value is a constant).
1314 while (!Worklist.empty()) {
1315 auto *I = cast<Instruction>(Worklist.pop_back_val());
1316 if (!Visited.insert(I).second)
1317 continue;
1318
1319 // Don't simplify instructions outside the loop.
1320 if (!L->contains(I))
1321 continue;
1322
1323 Value *Res = simplifyInstruction(I, I->getDataLayout());
1324 if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1325 for (User *U : I->users())
1326 Worklist.push_back(cast<Instruction>(U));
1327 I->replaceAllUsesWith(Res);
1328 DeadInsts.emplace_back(I);
1329 }
1330 }
1331}
1332
1333static Value *
1336 SCEVExpander &Rewriter) {
1337 ICmpInst::Predicate InvariantPred = LIP.Pred;
1338 BasicBlock *Preheader = L->getLoopPreheader();
1339 assert(Preheader && "Preheader doesn't exist");
1340 Rewriter.setInsertPoint(Preheader->getTerminator());
1341 auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1342 auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1343 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1344 if (ExitIfTrue)
1345 InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1346 IRBuilder<> Builder(Preheader->getTerminator());
1347 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1348 return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1349 BI->getCondition()->getName());
1350}
1351
1352static std::optional<Value *>
1353createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1354 const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1355 ScalarEvolution *SE, SCEVExpander &Rewriter) {
1356 CmpPredicate Pred = ICmp->getCmpPredicate();
1357 Value *LHS = ICmp->getOperand(0);
1358 Value *RHS = ICmp->getOperand(1);
1359
1360 // 'LHS pred RHS' should now mean that we stay in loop.
1361 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1362 if (Inverted)
1364
1365 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1366 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1367 // Can we prove it to be trivially true or false?
1368 if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1369 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1370
1371 auto *ARTy = LHSS->getType();
1372 auto *MaxIterTy = MaxIter->getType();
1373 // If possible, adjust types.
1374 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1375 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1376 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1377 const SCEV *MinusOne = SE->getMinusOne(ARTy);
1378 const SCEV *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1379 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1380 MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1381 }
1382
1383 if (SkipLastIter) {
1384 // Semantically skip last iter is "subtract 1, do not bother about unsigned
1385 // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1386 // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1387 // So we manually construct umin(a - 1, b - 1).
1389 if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1390 for (const SCEV *Op : UMin->operands())
1391 Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1392 MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1393 } else
1394 MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1395 }
1396
1397 // Check if there is a loop-invariant predicate equivalent to our check.
1398 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1399 L, BI, MaxIter);
1400 if (!LIP)
1401 return std::nullopt;
1402
1403 // Can we prove it to be trivially true?
1404 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1405 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1406 else
1407 return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1408}
1409
1411 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1412 bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1414 assert(
1415 (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1416 "Not a loop exit!");
1417
1418 // For branch that stays in loop by TRUE condition, go through AND. For branch
1419 // that stays in loop by FALSE condition, go through OR. Both gives the
1420 // similar logic: "stay in loop iff all conditions are true(false)".
1421 bool Inverted = L->contains(BI->getSuccessor(1));
1422 SmallVector<ICmpInst *, 4> LeafConditions;
1423 SmallVector<Value *, 4> Worklist;
1425 Value *OldCond = BI->getCondition();
1426 Visited.insert(OldCond);
1427 Worklist.push_back(OldCond);
1428
1429 auto GoThrough = [&](Value *V) {
1430 Value *LHS = nullptr, *RHS = nullptr;
1431 if (Inverted) {
1432 if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1433 return false;
1434 } else {
1435 if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1436 return false;
1437 }
1438 if (Visited.insert(LHS).second)
1439 Worklist.push_back(LHS);
1440 if (Visited.insert(RHS).second)
1441 Worklist.push_back(RHS);
1442 return true;
1443 };
1444
1445 do {
1446 Value *Curr = Worklist.pop_back_val();
1447 // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1448 // those with one use, to avoid instruction duplication.
1449 if (Curr->hasOneUse())
1450 if (!GoThrough(Curr))
1451 if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1452 LeafConditions.push_back(ICmp);
1453 } while (!Worklist.empty());
1454
1455 // If the current basic block has the same exit count as the whole loop, and
1456 // it consists of multiple icmp's, try to collect all icmp's that give exact
1457 // same exit count. For all other icmp's, we could use one less iteration,
1458 // because their value on the last iteration doesn't really matter.
1459 SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1460 if (!SkipLastIter && LeafConditions.size() > 1 &&
1461 SE->getExitCount(L, ExitingBB,
1463 MaxIter)
1464 for (auto *ICmp : LeafConditions) {
1465 auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1466 /*ControlsExit*/ false);
1467 const SCEV *ExitMax = EL.SymbolicMaxNotTaken;
1468 if (isa<SCEVCouldNotCompute>(ExitMax))
1469 continue;
1470 // They could be of different types (specifically this happens after
1471 // IV widening).
1472 auto *WiderType =
1473 SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1474 const SCEV *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1475 const SCEV *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1476 if (WideExitMax == WideMaxIter)
1477 ICmpsFailingOnLastIter.insert(ICmp);
1478 }
1479
1480 bool Changed = false;
1481 for (auto *OldCond : LeafConditions) {
1482 // Skip last iteration for this icmp under one of two conditions:
1483 // - We do it for all conditions;
1484 // - There is another ICmp that would fail on last iter, so this one doesn't
1485 // really matter.
1486 bool OptimisticSkipLastIter = SkipLastIter;
1487 if (!OptimisticSkipLastIter) {
1488 if (ICmpsFailingOnLastIter.size() > 1)
1489 OptimisticSkipLastIter = true;
1490 else if (ICmpsFailingOnLastIter.size() == 1)
1491 OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1492 }
1493 if (auto Replaced =
1494 createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1495 OptimisticSkipLastIter, SE, Rewriter)) {
1496 Changed = true;
1497 auto *NewCond = *Replaced;
1498 if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1499 NCI->setName(OldCond->getName() + ".first_iter");
1500 }
1501 LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1502 << " with " << *NewCond << "\n");
1503 assert(OldCond->hasOneUse() && "Must be!");
1504 OldCond->replaceAllUsesWith(NewCond);
1505 DeadInsts.push_back(OldCond);
1506 // Make sure we no longer consider this condition as failing on last
1507 // iteration.
1508 ICmpsFailingOnLastIter.erase(OldCond);
1509 }
1510 }
1511 return Changed;
1512}
1513
1514bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1515 // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1516 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1517 // never reaches the icmp since the zext doesn't fold to an AddRec unless
1518 // it already has flags. The alternative to this would be to extending the
1519 // set of "interesting" IV users to include the icmp, but doing that
1520 // regresses results in practice by querying SCEVs before trip counts which
1521 // rely on them which results in SCEV caching sub-optimal answers. The
1522 // concern about caching sub-optimal results is why we only query SCEVs of
1523 // the loop invariant RHS here.
1524 SmallVector<BasicBlock*, 16> ExitingBlocks;
1525 L->getExitingBlocks(ExitingBlocks);
1526 bool Changed = false;
1527 for (auto *ExitingBB : ExitingBlocks) {
1528 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1529 if (!BI)
1530 continue;
1531 assert(BI->isConditional() && "exit branch must be conditional");
1532
1533 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1534 if (!ICmp || !ICmp->hasOneUse())
1535 continue;
1536
1537 auto *LHS = ICmp->getOperand(0);
1538 auto *RHS = ICmp->getOperand(1);
1539 // For the range reasoning, avoid computing SCEVs in the loop to avoid
1540 // poisoning cache with sub-optimal results. For the must-execute case,
1541 // this is a neccessary precondition for correctness.
1542 if (!L->isLoopInvariant(RHS)) {
1543 if (!L->isLoopInvariant(LHS))
1544 continue;
1545 // Same logic applies for the inverse case
1546 std::swap(LHS, RHS);
1547 }
1548
1549 // Match (icmp signed-cond zext, RHS)
1550 Value *LHSOp = nullptr;
1551 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1552 continue;
1553
1554 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1555 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1556 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1557 FullCR = FullCR.zeroExtend(OuterBitWidth);
1558 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1559 if (FullCR.contains(RHSCR)) {
1560 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1561 // replace the signed condition with the unsigned version.
1562 ICmp->setPredicate(ICmp->getUnsignedPredicate());
1563 Changed = true;
1564 // Note: No SCEV invalidation needed. We've changed the predicate, but
1565 // have not changed exit counts, or the values produced by the compare.
1566 continue;
1567 }
1568 }
1569
1570 // Now that we've canonicalized the condition to match the extend,
1571 // see if we can rotate the extend out of the loop.
1572 for (auto *ExitingBB : ExitingBlocks) {
1573 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1574 if (!BI)
1575 continue;
1576 assert(BI->isConditional() && "exit branch must be conditional");
1577
1578 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1579 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1580 continue;
1581
1582 bool Swapped = false;
1583 auto *LHS = ICmp->getOperand(0);
1584 auto *RHS = ICmp->getOperand(1);
1585 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1586 // Nothing to rotate
1587 continue;
1588 if (L->isLoopInvariant(LHS)) {
1589 // Same logic applies for the inverse case until we actually pick
1590 // which operand of the compare to update.
1591 Swapped = true;
1592 std::swap(LHS, RHS);
1593 }
1594 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1595
1596 // Match (icmp unsigned-cond zext, RHS)
1597 // TODO: Extend to handle corresponding sext/signed-cmp case
1598 // TODO: Extend to other invertible functions
1599 Value *LHSOp = nullptr;
1600 if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1601 continue;
1602
1603 // In general, we only rotate if we can do so without increasing the number
1604 // of instructions. The exception is when we have an zext(add-rec). The
1605 // reason for allowing this exception is that we know we need to get rid
1606 // of the zext for SCEV to be able to compute a trip count for said loops;
1607 // we consider the new trip count valuable enough to increase instruction
1608 // count by one.
1609 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1610 continue;
1611
1612 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1613 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1614 // when zext is loop varying and RHS is loop invariant. This converts
1615 // loop varying work to loop-invariant work.
1616 auto doRotateTransform = [&]() {
1617 assert(ICmp->isUnsigned() && "must have proven unsigned already");
1618 auto *NewRHS = CastInst::Create(
1619 Instruction::Trunc, RHS, LHSOp->getType(), "",
1620 L->getLoopPreheader()->getTerminator()->getIterator());
1621 // NewRHS is an operation that has been hoisted out of the loop, and
1622 // therefore should have a dropped location.
1623 NewRHS->setDebugLoc(DebugLoc::getDropped());
1624 ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1625 ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1626 // Samesign flag cannot be preserved after narrowing the compare.
1627 ICmp->setSameSign(false);
1628 if (LHS->use_empty())
1629 DeadInsts.push_back(LHS);
1630 };
1631
1632 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1633 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1634 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1635 FullCR = FullCR.zeroExtend(OuterBitWidth);
1636 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1637 if (FullCR.contains(RHSCR)) {
1638 doRotateTransform();
1639 Changed = true;
1640 // Note, we are leaving SCEV in an unfortunately imprecise case here
1641 // as rotation tends to reveal information about trip counts not
1642 // previously visible.
1643 continue;
1644 }
1645 }
1646
1647 return Changed;
1648}
1649
1650bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1651 SmallVector<BasicBlock*, 16> ExitingBlocks;
1652 L->getExitingBlocks(ExitingBlocks);
1653
1654 // Remove all exits which aren't both rewriteable and execute on every
1655 // iteration.
1656 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1657 // If our exitting block exits multiple loops, we can only rewrite the
1658 // innermost one. Otherwise, we're changing how many times the innermost
1659 // loop runs before it exits.
1660 if (LI->getLoopFor(ExitingBB) != L)
1661 return true;
1662
1663 // Can't rewrite non-branch yet.
1664 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1665 if (!BI)
1666 return true;
1667
1668 // Likewise, the loop latch must be dominated by the exiting BB.
1669 if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1670 return true;
1671
1672 if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1673 // If already constant, nothing to do. However, if this is an
1674 // unconditional exit, we can still replace header phis with their
1675 // preheader value.
1676 if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1677 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1678 return true;
1679 }
1680
1681 return false;
1682 });
1683
1684 if (ExitingBlocks.empty())
1685 return false;
1686
1687 // Get a symbolic upper bound on the loop backedge taken count.
1688 const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1689 if (isa<SCEVCouldNotCompute>(MaxBECount))
1690 return false;
1691
1692 // Visit our exit blocks in order of dominance. We know from the fact that
1693 // all exits must dominate the latch, so there is a total dominance order
1694 // between them.
1695 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1696 // std::sort sorts in ascending order, so we want the inverse of
1697 // the normal dominance relation.
1698 if (A == B) return false;
1699 if (DT->properlyDominates(A, B))
1700 return true;
1701 else {
1702 assert(DT->properlyDominates(B, A) &&
1703 "expected total dominance order!");
1704 return false;
1705 }
1706 });
1707#ifdef ASSERT
1708 for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1709 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1710 }
1711#endif
1712
1713 bool Changed = false;
1714 bool SkipLastIter = false;
1715 const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1716 auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1717 if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1718 return;
1719 if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1720 CurrMaxExit = MaxExitCount;
1721 else
1722 CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1723 // If the loop has more than 1 iteration, all further checks will be
1724 // executed 1 iteration less.
1725 if (CurrMaxExit == MaxBECount)
1726 SkipLastIter = true;
1727 };
1728 SmallPtrSet<const SCEV *, 8> DominatingExactExitCounts;
1729 for (BasicBlock *ExitingBB : ExitingBlocks) {
1730 const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1731 const SCEV *MaxExitCount = SE->getExitCount(
1732 L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1733 if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1734 // Okay, we do not know the exit count here. Can we at least prove that it
1735 // will remain the same within iteration space?
1736 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1737 auto OptimizeCond = [&](bool SkipLastIter) {
1738 return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1739 MaxBECount, SkipLastIter,
1740 SE, Rewriter, DeadInsts);
1741 };
1742
1743 // TODO: We might have proved that we can skip the last iteration for
1744 // this check. In this case, we only want to check the condition on the
1745 // pre-last iteration (MaxBECount - 1). However, there is a nasty
1746 // corner case:
1747 //
1748 // for (i = len; i != 0; i--) { ... check (i ult X) ... }
1749 //
1750 // If we could not prove that len != 0, then we also could not prove that
1751 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1752 // OptimizeCond will likely not prove anything for it, even if it could
1753 // prove the same fact for len.
1754 //
1755 // As a temporary solution, we query both last and pre-last iterations in
1756 // hope that we will be able to prove triviality for at least one of
1757 // them. We can stop querying MaxBECount for this case once SCEV
1758 // understands that (MaxBECount - 1) will not overflow here.
1759 if (OptimizeCond(false))
1760 Changed = true;
1761 else if (SkipLastIter && OptimizeCond(true))
1762 Changed = true;
1763 UpdateSkipLastIter(MaxExitCount);
1764 continue;
1765 }
1766
1767 UpdateSkipLastIter(ExactExitCount);
1768
1769 // If we know we'd exit on the first iteration, rewrite the exit to
1770 // reflect this. This does not imply the loop must exit through this
1771 // exit; there may be an earlier one taken on the first iteration.
1772 // We know that the backedge can't be taken, so we replace all
1773 // the header PHIs with values coming from the preheader.
1774 if (ExactExitCount->isZero()) {
1775 foldExit(L, ExitingBB, true, DeadInsts);
1776 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1777 Changed = true;
1778 continue;
1779 }
1780
1781 assert(ExactExitCount->getType()->isIntegerTy() &&
1782 MaxBECount->getType()->isIntegerTy() &&
1783 "Exit counts must be integers");
1784
1785 Type *WiderType =
1786 SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1787 ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1788 MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1789 assert(MaxBECount->getType() == ExactExitCount->getType());
1790
1791 // Can we prove that some other exit must be taken strictly before this
1792 // one?
1793 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1794 ExactExitCount)) {
1795 foldExit(L, ExitingBB, false, DeadInsts);
1796 Changed = true;
1797 continue;
1798 }
1799
1800 // As we run, keep track of which exit counts we've encountered. If we
1801 // find a duplicate, we've found an exit which would have exited on the
1802 // exiting iteration, but (from the visit order) strictly follows another
1803 // which does the same and is thus dead.
1804 if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1805 foldExit(L, ExitingBB, false, DeadInsts);
1806 Changed = true;
1807 continue;
1808 }
1809
1810 // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1811 // here. If we kept track of the min of dominanting exits so far, we could
1812 // discharge exits with EC >= MDEC. This is less powerful than the existing
1813 // transform (since later exits aren't considered), but potentially more
1814 // powerful for any case where SCEV can prove a >=u b, but neither a == b
1815 // or a >u b. Such a case is not currently known.
1816 }
1817 return Changed;
1818}
1819
1820static bool crashingBBWithoutEffect(const BasicBlock &BB) {
1821 return llvm::all_of(BB, [](const Instruction &I) {
1822 // TODO: for now this is overly restrictive, to make sure nothing in this
1823 // BB can depend on the loop body.
1824 // It's not enough to check for !I.mayHaveSideEffects(), because e.g. a
1825 // load does not have a side effect, but we could have
1826 // %a = load ptr, ptr %ptr
1827 // %b = load i32, ptr %a
1828 // Now if the loop stored a non-nullptr to %a, we could cause a nullptr
1829 // dereference by skipping over loop iterations.
1830 if (const auto *CB = dyn_cast<CallBase>(&I)) {
1831 if (CB->onlyAccessesInaccessibleMemory())
1832 return true;
1833 }
1834 return isa<UnreachableInst>(I);
1835 });
1836}
1837
1838bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1839 SmallVector<BasicBlock*, 16> ExitingBlocks;
1840 L->getExitingBlocks(ExitingBlocks);
1841
1842 // Finally, see if we can rewrite our exit conditions into a loop invariant
1843 // form. If we have a read-only loop, and we can tell that we must exit down
1844 // a path which does not need any of the values computed within the loop, we
1845 // can rewrite the loop to exit on the first iteration. Note that this
1846 // doesn't either a) tell us the loop exits on the first iteration (unless
1847 // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1848 // This transformation looks a lot like a restricted form of dead loop
1849 // elimination, but restricted to read-only loops and without neccesssarily
1850 // needing to kill the loop entirely.
1851 if (!LoopPredication)
1852 return false;
1853
1854 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1855 // through *explicit* control flow. We have to eliminate the possibility of
1856 // implicit exits (see below) before we know it's truly exact.
1857 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1858 if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1859 return false;
1860
1861 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1862 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1863
1864 auto BadExit = [&](BasicBlock *ExitingBB) {
1865 // If our exiting block exits multiple loops, we can only rewrite the
1866 // innermost one. Otherwise, we're changing how many times the innermost
1867 // loop runs before it exits.
1868 if (LI->getLoopFor(ExitingBB) != L)
1869 return true;
1870
1871 // Can't rewrite non-branch yet.
1872 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1873 if (!BI)
1874 return true;
1875
1876 // If already constant, nothing to do.
1877 if (isa<Constant>(BI->getCondition()))
1878 return true;
1879
1880 // If the exit block has phis, we need to be able to compute the values
1881 // within the loop which contains them. This assumes trivially lcssa phis
1882 // have already been removed; TODO: generalize
1883 BasicBlock *ExitBlock =
1884 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1885 if (!ExitBlock->phis().empty())
1886 return true;
1887
1888 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1889 if (isa<SCEVCouldNotCompute>(ExitCount) ||
1890 !Rewriter.isSafeToExpand(ExitCount))
1891 return true;
1892
1893 assert(SE->isLoopInvariant(ExitCount, L) &&
1894 "Exit count must be loop invariant");
1895 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1896 return false;
1897 };
1898
1899 // Make sure all exits dominate the latch. This means there is a linear chain
1900 // of exits. We check this before sorting so we have a total order.
1901 BasicBlock *Latch = L->getLoopLatch();
1902 for (BasicBlock *ExitingBB : ExitingBlocks)
1903 if (!DT->dominates(ExitingBB, Latch))
1904 return false;
1905
1906 // If we have any exits which can't be predicated themselves, than we can't
1907 // predicate any exit which isn't guaranteed to execute before it. Consider
1908 // two exits (a) and (b) which would both exit on the same iteration. If we
1909 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1910 // we could convert a loop from exiting through (a) to one exiting through
1911 // (b). Note that this problem exists only for exits with the same exit
1912 // count, and we could be more aggressive when exit counts are known inequal.
1913 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1914 // llvm::sort sorts in ascending order, so we want the inverse of
1915 // the normal dominance relation.
1916 if (A == B)
1917 return false;
1918 if (DT->properlyDominates(A, B))
1919 return true;
1920 if (DT->properlyDominates(B, A))
1921 return false;
1922 llvm_unreachable("Should have total dominance order");
1923 });
1924
1925 // Make sure our exit blocks are really a total order (i.e. a linear chain of
1926 // exits before the backedge).
1927 for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1928 assert(DT->dominates(ExitingBlocks[i - 1], ExitingBlocks[i]) &&
1929 "Not sorted by dominance");
1930
1931 // Given our sorted total order, we know that exit[j] must be evaluated
1932 // after all exit[i] such j > i.
1933 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1934 if (BadExit(ExitingBlocks[i])) {
1935 ExitingBlocks.resize(i);
1936 break;
1937 }
1938
1939 if (ExitingBlocks.empty())
1940 return false;
1941
1942 // At this point, ExitingBlocks consists of only those blocks which are
1943 // predicatable. Given that, we know we have at least one exit we can
1944 // predicate if the loop is doesn't have side effects and doesn't have any
1945 // implicit exits (because then our exact BTC isn't actually exact).
1946 // @Reviewers - As structured, this is O(I^2) for loop nests. Any
1947 // suggestions on how to improve this? I can obviously bail out for outer
1948 // loops, but that seems less than ideal. MemorySSA can find memory writes,
1949 // is that enough for *all* side effects?
1950 bool HasThreadLocalSideEffects = false;
1951 for (BasicBlock *BB : L->blocks())
1952 for (auto &I : *BB) {
1953 // TODO:isGuaranteedToTransfer
1954 if (I.mayHaveSideEffects()) {
1956 return false;
1957 HasThreadLocalSideEffects = true;
1958 if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
1959 // Simple stores cannot be observed by other threads.
1960 // If HasThreadLocalSideEffects is set, we check
1961 // crashingBBWithoutEffect to make sure that the crashing BB cannot
1962 // observe them either.
1963 if (!SI->isSimple())
1964 return false;
1965 } else {
1966 return false;
1967 }
1968 }
1969
1970 // Skip if the loop has tokens referenced outside the loop to avoid
1971 // changing convergence behavior.
1972 if (I.getType()->isTokenTy()) {
1973 for (User *U : I.users()) {
1974 Instruction *UserInst = dyn_cast<Instruction>(U);
1975 if (UserInst && !L->contains(UserInst)) {
1976 return false;
1977 }
1978 }
1979 }
1980 }
1981
1982 bool Changed = false;
1983 // Finally, do the actual predication for all predicatable blocks. A couple
1984 // of notes here:
1985 // 1) We don't bother to constant fold dominated exits with identical exit
1986 // counts; that's simply a form of CSE/equality propagation and we leave
1987 // it for dedicated passes.
1988 // 2) We insert the comparison at the branch. Hoisting introduces additional
1989 // legality constraints and we leave that to dedicated logic. We want to
1990 // predicate even if we can't insert a loop invariant expression as
1991 // peeling or unrolling will likely reduce the cost of the otherwise loop
1992 // varying check.
1993 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1994 IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1995 Value *ExactBTCV = nullptr; // Lazily generated if needed.
1996 for (BasicBlock *ExitingBB : ExitingBlocks) {
1997 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1998
1999 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2000 if (HasThreadLocalSideEffects) {
2001 const BasicBlock *Unreachable = nullptr;
2002 for (const BasicBlock *Succ : BI->successors()) {
2003 if (isa<UnreachableInst>(Succ->getTerminator()))
2004 Unreachable = Succ;
2005 }
2006 // Exit BB which have one branch back into the loop and another one to
2007 // a trap can still be optimized, because local side effects cannot
2008 // be observed in the exit case (the trap). We could be smarter about
2009 // this, but for now lets pattern match common cases that directly trap.
2010 if (Unreachable == nullptr || !crashingBBWithoutEffect(*Unreachable))
2011 return Changed;
2012 }
2013 Value *NewCond;
2014 if (ExitCount == ExactBTC) {
2015 NewCond = L->contains(BI->getSuccessor(0)) ?
2016 B.getFalse() : B.getTrue();
2017 } else {
2018 Value *ECV = Rewriter.expandCodeFor(ExitCount);
2019 if (!ExactBTCV)
2020 ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2021 Value *RHS = ExactBTCV;
2022 if (ECV->getType() != RHS->getType()) {
2023 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2024 ECV = B.CreateZExt(ECV, WiderTy);
2025 RHS = B.CreateZExt(RHS, WiderTy);
2026 }
2027 auto Pred = L->contains(BI->getSuccessor(0)) ?
2028 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2029 NewCond = B.CreateICmp(Pred, ECV, RHS);
2030 }
2031 Value *OldCond = BI->getCondition();
2032 BI->setCondition(NewCond);
2033 if (OldCond->use_empty())
2034 DeadInsts.emplace_back(OldCond);
2035 Changed = true;
2036 RunUnswitching = true;
2037 }
2038
2039 return Changed;
2040}
2041
2042//===----------------------------------------------------------------------===//
2043// IndVarSimplify driver. Manage several subpasses of IV simplification.
2044//===----------------------------------------------------------------------===//
2045
2046bool IndVarSimplify::run(Loop *L) {
2047 // We need (and expect!) the incoming loop to be in LCSSA.
2048 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2049 "LCSSA required to run indvars!");
2050
2051 // If LoopSimplify form is not available, stay out of trouble. Some notes:
2052 // - LSR currently only supports LoopSimplify-form loops. Indvars'
2053 // canonicalization can be a pessimization without LSR to "clean up"
2054 // afterwards.
2055 // - We depend on having a preheader; in particular,
2056 // Loop::getCanonicalInductionVariable only supports loops with preheaders,
2057 // and we're in trouble if we can't find the induction variable even when
2058 // we've manually inserted one.
2059 // - LFTR relies on having a single backedge.
2060 if (!L->isLoopSimplifyForm())
2061 return false;
2062
2063 bool Changed = false;
2064 // If there are any floating-point recurrences, attempt to
2065 // transform them to use integer recurrences.
2066 Changed |= rewriteNonIntegerIVs(L);
2067
2068 // Create a rewriter object which we'll use to transform the code with.
2069 SCEVExpander Rewriter(*SE, DL, "indvars");
2070#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2071 Rewriter.setDebugType(DEBUG_TYPE);
2072#endif
2073
2074 // Eliminate redundant IV users.
2075 //
2076 // Simplification works best when run before other consumers of SCEV. We
2077 // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2078 // other expressions involving loop IVs have been evaluated. This helps SCEV
2079 // set no-wrap flags before normalizing sign/zero extension.
2080 Rewriter.disableCanonicalMode();
2081 Changed |= simplifyAndExtend(L, Rewriter, LI);
2082
2083 // Check to see if we can compute the final value of any expressions
2084 // that are recurrent in the loop, and substitute the exit values from the
2085 // loop into any instructions outside of the loop that use the final values
2086 // of the current expressions.
2087 if (ReplaceExitValue != NeverRepl) {
2088 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
2089 ReplaceExitValue, DeadInsts)) {
2090 NumReplaced += Rewrites;
2091 Changed = true;
2092 }
2093 }
2094
2095 // Eliminate redundant IV cycles.
2096 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
2097
2098 // Try to convert exit conditions to unsigned and rotate computation
2099 // out of the loop. Note: Handles invalidation internally if needed.
2100 Changed |= canonicalizeExitCondition(L);
2101
2102 // Try to eliminate loop exits based on analyzeable exit counts
2103 if (optimizeLoopExits(L, Rewriter)) {
2104 Changed = true;
2105 // Given we've changed exit counts, notify SCEV
2106 // Some nested loops may share same folded exit basic block,
2107 // thus we need to notify top most loop.
2108 SE->forgetTopmostLoop(L);
2109 }
2110
2111 // Try to form loop invariant tests for loop exits by changing how many
2112 // iterations of the loop run when that is unobservable.
2113 if (predicateLoopExits(L, Rewriter)) {
2114 Changed = true;
2115 // Given we've changed exit counts, notify SCEV
2116 SE->forgetLoop(L);
2117 }
2118
2119 // If we have a trip count expression, rewrite the loop's exit condition
2120 // using it.
2121 if (!DisableLFTR) {
2122 BasicBlock *PreHeader = L->getLoopPreheader();
2123
2124 SmallVector<BasicBlock*, 16> ExitingBlocks;
2125 L->getExitingBlocks(ExitingBlocks);
2126 for (BasicBlock *ExitingBB : ExitingBlocks) {
2127 // Can't rewrite non-branch yet.
2128 if (!isa<BranchInst>(ExitingBB->getTerminator()))
2129 continue;
2130
2131 // If our exitting block exits multiple loops, we can only rewrite the
2132 // innermost one. Otherwise, we're changing how many times the innermost
2133 // loop runs before it exits.
2134 if (LI->getLoopFor(ExitingBB) != L)
2135 continue;
2136
2137 if (!needsLFTR(L, ExitingBB))
2138 continue;
2139
2140 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2141 if (isa<SCEVCouldNotCompute>(ExitCount))
2142 continue;
2143
2144 // This was handled above, but as we form SCEVs, we can sometimes refine
2145 // existing ones; this allows exit counts to be folded to zero which
2146 // weren't when optimizeLoopExits saw them. Arguably, we should iterate
2147 // until stable to handle cases like this better.
2148 if (ExitCount->isZero())
2149 continue;
2150
2151 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2152 if (!IndVar)
2153 continue;
2154
2155 // Avoid high cost expansions. Note: This heuristic is questionable in
2156 // that our definition of "high cost" is not exactly principled.
2157 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2158 TTI, PreHeader->getTerminator()))
2159 continue;
2160
2161 if (!Rewriter.isSafeToExpand(ExitCount))
2162 continue;
2163
2164 Changed |= linearFunctionTestReplace(L, ExitingBB,
2165 ExitCount, IndVar,
2166 Rewriter);
2167 }
2168 }
2169 // Clear the rewriter cache, because values that are in the rewriter's cache
2170 // can be deleted in the loop below, causing the AssertingVH in the cache to
2171 // trigger.
2172 Rewriter.clear();
2173
2174 // Now that we're done iterating through lists, clean up any instructions
2175 // which are now dead.
2176 while (!DeadInsts.empty()) {
2177 Value *V = DeadInsts.pop_back_val();
2178
2179 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2180 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2181 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2182 Changed |=
2183 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2184 }
2185
2186 // The Rewriter may not be used from this point on.
2187
2188 // Loop-invariant instructions in the preheader that aren't used in the
2189 // loop may be sunk below the loop to reduce register pressure.
2190 Changed |= sinkUnusedInvariants(L);
2191
2192 // rewriteFirstIterationLoopExitValues does not rely on the computation of
2193 // trip count and therefore can further simplify exit values in addition to
2194 // rewriteLoopExitValues.
2195 Changed |= rewriteFirstIterationLoopExitValues(L);
2196
2197 // Clean up dead instructions.
2198 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2199
2200 // Check a post-condition.
2201 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2202 "Indvars did not preserve LCSSA!");
2203 if (VerifyMemorySSA && MSSAU)
2204 MSSAU->getMemorySSA()->verifyMemorySSA();
2205
2206 return Changed;
2207}
2208
2211 LPMUpdater &) {
2212 Function *F = L.getHeader()->getParent();
2213 const DataLayout &DL = F->getDataLayout();
2214
2215 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2216 WidenIndVars && AllowIVWidening);
2217 if (!IVS.run(&L))
2218 return PreservedAnalyses::all();
2219
2220 auto PA = getLoopPassPreservedAnalyses();
2221 PA.preserveSet<CFGAnalyses>();
2222 if (IVS.runUnswitching()) {
2224 PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
2225 }
2226
2227 if (AR.MSSA)
2228 PA.preserve<MemorySSAAnalysis>();
2229 return PA;
2230}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define DEBUG_TYPE
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
static Value * genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, const SCEV *ExitCount, bool UsePostInc, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
Insert an IR expression which computes the value held by the IV IndVar (which must be an loop counter...
static std::optional< FloatingPointIV > maybeFloatingPointRecurrence(Loop *L, PHINode *PN)
Analyze a PN to determine whether it represents a simple floating-point induction variable,...
static void replaceExitCond(BranchInst *BI, Value *NewCond, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static cl::opt< bool > DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization"))
static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB)
Whether the current loop exit test is based on this value.
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(UnusedIndVarInLoop, "unusedindvarinloop", "only replace exit value when it is an unused " "induction variable in the loop and has cheap replacement cost"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
Update information about the induction variable that is extended by this sign or zero extend operatio...
static bool isRepresentableAsExactInteger(const APFloat &FPVal, int64_t IntVal)
Ensure we stay within the bounds of fp values that can be represented as integers without gaps,...
static void replaceLoopPHINodesWithPreheaderValues(LoopInfo *LI, Loop *L, SmallVectorImpl< WeakTrackingVH > &DeadInsts, ScalarEvolution &SE)
static bool needsLFTR(Loop *L, BasicBlock *ExitingBB)
linearFunctionTestReplace policy.
static bool optimizeLoopExitWithUnknownExitCount(const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static Value * createInvariantCond(const Loop *L, BasicBlock *ExitingBB, const ScalarEvolution::LoopInvariantPredicate &LIP, SCEVExpander &Rewriter)
static bool isLoopCounter(PHINode *Phi, Loop *L, ScalarEvolution *SE)
Return true if the given phi is a "counter" in L.
static std::optional< Value * > createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB, const SCEV *MaxIter, bool Inverted, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter)
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value * > &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L)
Given an Value which is hoped to be part of an add recurance in the given loop, return the associated...
static Constant * createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB, bool IsTaken)
static std::optional< IntegerIV > tryConvertToIntegerIV(const FloatingPointIV &FPIV)
Ensure that the floating-point IV can be converted to a semantics-preserving signed 32-bit integer IV...
static cl::opt< bool > LoopPredicationTraps("indvars-predicate-loop-traps", cl::Hidden, cl::init(true), cl::desc("Predicate conditions that trap in loops with only local writes"))
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
static void canonicalizeToIntegerIV(Loop *L, PHINode *PN, const FloatingPointIV &FPIV, const IntegerIV &IIV, const TargetLibraryInfo *TLI, std::unique_ptr< MemorySSAUpdater > &MSSAU)
Rewrite the floating-point IV as an integer IV.
static PHINode * FindLoopCounter(Loop *L, BasicBlock *ExitingBB, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
Search the loop header for a loop counter (anadd rec w/step of one) suitable for use by LFTR.
static cl::opt< bool > AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), cl::desc("Allow widening of indvars to eliminate s/zext"))
static bool crashingBBWithoutEffect(const BasicBlock &BB)
static CmpInst::Predicate getIntegerPredicate(CmpInst::Predicate FPPred)
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
Convert APF to an integer, if possible.
static cl::opt< bool > LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), cl::desc("Predicate conditions in read only loops"))
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:290
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:331
const fltSemantics & getSemantics() const
Definition APFloat.h:1439
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1314
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition InstrTypes.h:610
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
static LLVM_ABI StringRef getPredicateName(Predicate P)
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:131
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition DataLayout.h:229
static DebugLoc getDropped()
Definition DebugLoc.h:164
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This instruction compares its operands according to the predicate given to the constructor.
This instruction compares its operands according to the predicate given to the constructor.
CmpPredicate getCmpPredicate() const
CmpPredicate getInverseCmpPredicate() const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition LoopInfo.h:441
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:936
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
This node represents a polynomial recurrence on the trip count of the specified loop.
LLVM_ABI const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
LLVM_ABI const SCEVAddRecExpr * getPostIncExpr(ScalarEvolution &SE) const
Return an expression representing the value of this expression one iteration of the loop ahead.
This class uses information about analyze scalars to rewrite expressions in canonical form.
This class represents an analyzed expression in the program.
LLVM_ABI bool isOne() const
Return true if the expression is a constant one.
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents a cast from signed integer to floating point.
The main scalar evolution driver.
LLVM_ABI std::optional< LoopInvariantPredicate > getLoopInvariantExitCondDuringFirstIterations(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, const Instruction *CtxI, const SCEV *MaxIter)
If the result of the predicate LHS Pred RHS is loop invariant with respect to L at given Context duri...
LLVM_ABI Type * getWiderType(Type *Ty1, Type *Ty2) const
LLVM_ABI bool isLoopEntryGuardedByCond(const Loop *L, CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS.
LLVM_ABI const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI ExitLimit computeExitLimitFromCond(const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates=false)
Compute the number of times the backedge of the specified loop will execute if its exit condition wer...
LLVM_ABI uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI std::optional< bool > evaluatePredicateAt(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Check whether the condition described by Pred, LHS, and RHS is true or false in the given Context.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI void forgetTopmostLoop(const Loop *L)
LLVM_ABI void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
LLVM_ABI const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getMinusOne(Type *Ty)
Return a SCEV for the constant -1 of a specific type.
LLVM_ABI const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI const SCEV * getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
Promote the operands to the wider of the types using zero-extension, and then perform a umin operatio...
LLVM_ABI const SCEV * getCouldNotCompute()
LLVM_ABI const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
@ SymbolicMaximum
An expression which provides an upper bound on the exact trip count.
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI bool isKnownPredicateAt(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Value handle that is nullable, but tries to track the Value.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
cst_pred_ty< is_one > m_scev_One()
Match an integer 1.
specificloop_ty m_SpecificLoop(const Loop *L)
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
bool match(const SCEV *S, const Pattern &P)
class_match< const SCEV > m_SCEV()
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, Instruction *OnPathTo, DominatorTree *DT)
Return true if undefined behavior would provable be executed on the path to OnPathTo if Root produced...
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
PHINode * createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges)
Widen Induction Variables - Extend the width of an IV to cover its widest uses.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI cl::opt< unsigned > SCEVCheapExpansionBudget
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
std::pair< bool, bool > simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition MemorySSA.cpp:84
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
DWARFExpression::Operation Op
constexpr U AbsoluteValue(T X)
Return the absolute value of a signed integer, converted to the corresponding unsigned integer type.
Definition MathExtras.h:592
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1867
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
LLVM_ABI bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond)
Return true if the induction variable IV in a Loop whose latch is LatchBlock would become dead if the...
LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
LLVM_ABI bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
Definition Local.cpp:641
@ UnusedIndVarInLoop
Definition LoopUtils.h:558
@ OnlyCheapRepl
Definition LoopUtils.h:556
@ NeverRepl
Definition LoopUtils.h:555
@ NoHardUse
Definition LoopUtils.h:557
@ AlwaysRepl
Definition LoopUtils.h:559
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:867
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
Represents a floating-point induction variable pattern that may be convertible to integer form.
FloatingPointIV(APFloat Init, APFloat Incr, APFloat Exit, FCmpInst *Compare, BinaryOperator *Add)
BinaryOperator * Add
Represents the integer values for a converted IV.
int64_t InitValue
int64_t ExitValue
int64_t IncrValue
CmpInst::Predicate NewPred
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
A marker analysis to determine if SimpleLoopUnswitch should run again on a given loop.
Collect information about induction variables that are used by sign/zero extend operations.