LLVM 23.0.0git
IndVarSimplify.cpp
Go to the documentation of this file.
1//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This transformation analyzes and transforms the induction variables (and
10// computations derived from them) into simpler forms suitable for subsequent
11// analysis and transformation.
12//
13// If the trip count of a loop is computable, this pass also makes the following
14// changes:
15// 1. The exit condition for the loop is canonicalized to compare the
16// induction value against the exit value. This turns loops like:
17// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18// 2. Any use outside of the loop of an expression derived from the indvar
19// is changed to compute the derived value outside of the loop, eliminating
20// the dependence on the exit value of the induction variable. If the only
21// purpose of the loop is to compute the exit value of some derived
22// expression, this transformation will make the loop dead.
23//
24//===----------------------------------------------------------------------===//
25
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/Statistic.h"
44#include "llvm/IR/BasicBlock.h"
45#include "llvm/IR/Constant.h"
47#include "llvm/IR/Constants.h"
48#include "llvm/IR/DataLayout.h"
50#include "llvm/IR/Dominators.h"
51#include "llvm/IR/Function.h"
52#include "llvm/IR/IRBuilder.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
57#include "llvm/IR/Intrinsics.h"
58#include "llvm/IR/PassManager.h"
60#include "llvm/IR/Type.h"
61#include "llvm/IR/Use.h"
62#include "llvm/IR/User.h"
63#include "llvm/IR/Value.h"
64#include "llvm/IR/ValueHandle.h"
67#include "llvm/Support/Debug.h"
76#include <cassert>
77#include <cstdint>
78#include <utility>
79
80using namespace llvm;
81using namespace PatternMatch;
82using namespace SCEVPatternMatch;
83
84#define DEBUG_TYPE "indvars"
85
86STATISTIC(NumWidened , "Number of indvars widened");
87STATISTIC(NumReplaced , "Number of exit values replaced");
88STATISTIC(NumLFTR , "Number of loop exit tests replaced");
89STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
90STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
91
93 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
94 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
96 clEnumValN(NeverRepl, "never", "never replace exit value"),
98 "only replace exit value when the cost is cheap"),
100 UnusedIndVarInLoop, "unusedindvarinloop",
101 "only replace exit value when it is an unused "
102 "induction variable in the loop and has cheap replacement cost"),
103 clEnumValN(NoHardUse, "noharduse",
104 "only replace exit values when loop def likely dead"),
105 clEnumValN(AlwaysRepl, "always",
106 "always replace exit value whenever possible")));
107
109 "indvars-post-increment-ranges", cl::Hidden,
110 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
111 cl::init(true));
112
113static cl::opt<bool>
114DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
115 cl::desc("Disable Linear Function Test Replace optimization"));
116
117static cl::opt<bool>
118LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
119 cl::desc("Predicate conditions in read only loops"));
120
122 "indvars-predicate-loop-traps", cl::Hidden, cl::init(true),
123 cl::desc("Predicate conditions that trap in loops with only local writes"));
124
125static cl::opt<bool>
126AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
127 cl::desc("Allow widening of indvars to eliminate s/zext"));
128
129namespace {
130
131class IndVarSimplify {
132 LoopInfo *LI;
133 ScalarEvolution *SE;
134 DominatorTree *DT;
135 const DataLayout &DL;
138 std::unique_ptr<MemorySSAUpdater> MSSAU;
139
141 bool WidenIndVars;
142
143 bool RunUnswitching = false;
144
145 bool handleFloatingPointIV(Loop *L, PHINode *PH);
146 bool rewriteNonIntegerIVs(Loop *L);
147
148 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
149 /// Try to improve our exit conditions by converting condition from signed
150 /// to unsigned or rotating computation out of the loop.
151 /// (See inline comment about why this is duplicated from simplifyAndExtend)
152 bool canonicalizeExitCondition(Loop *L);
153 /// Try to eliminate loop exits based on analyzeable exit counts
154 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
155 /// Try to form loop invariant tests for loop exits by changing how many
156 /// iterations of the loop run when that is unobservable.
157 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
158
159 bool rewriteFirstIterationLoopExitValues(Loop *L);
160
161 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
162 const SCEV *ExitCount,
163 PHINode *IndVar, SCEVExpander &Rewriter);
164
165 bool sinkUnusedInvariants(Loop *L);
166
167public:
168 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
169 const DataLayout &DL, TargetLibraryInfo *TLI,
170 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
171 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
172 WidenIndVars(WidenIndVars) {
173 if (MSSA)
174 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
175 }
176
177 bool run(Loop *L);
178
179 bool runUnswitching() const { return RunUnswitching; }
180};
181
182} // end anonymous namespace
183
184//===----------------------------------------------------------------------===//
185// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
186//===----------------------------------------------------------------------===//
187
188/// Convert APF to an integer, if possible.
189static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
190 bool isExact = false;
191 // See if we can convert this to an int64_t
192 uint64_t UIntVal;
193 if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
194 APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
195 !isExact)
196 return false;
197 IntVal = UIntVal;
198 return true;
199}
200
201/// Ensure we stay within the bounds of fp values that can be represented as
202/// integers without gaps, which are 2^24 and 2^53 for IEEE-754 single and
203/// double precision respectively (both on negative and positive side).
204static bool isRepresentableAsExactInteger(const APFloat &FPVal,
205 int64_t IntVal) {
206 const auto &FltSema = FPVal.getSemantics();
207 if (!APFloat::isIEEELikeFP(FltSema))
208 return false;
209 return isUIntN(APFloat::semanticsPrecision(FltSema), AbsoluteValue(IntVal));
210}
211
212/// Represents a floating-point induction variable pattern that may be
213/// convertible to integer form.
226
227/// Represents the integer values for a converted IV.
234
236 switch (FPPred) {
239 return CmpInst::ICMP_EQ;
242 return CmpInst::ICMP_NE;
245 return CmpInst::ICMP_SGT;
248 return CmpInst::ICMP_SGE;
251 return CmpInst::ICMP_SLT;
254 return CmpInst::ICMP_SLE;
255 default:
257 }
258}
259
260/// Analyze a PN to determine whether it represents a simple floating-point
261/// induction variable, with constant fp init, increment, and exit values.
262///
263/// Returns a FloatingPointIV struct if matched, std::nullopt otherwise.
264static std::optional<FloatingPointIV>
266 // Identify incoming and backedge for the PN.
267 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
268 unsigned BackEdge = IncomingEdge ^ 1;
269
270 // Check incoming value.
271 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
272 if (!InitValueVal)
273 return std::nullopt;
274
275 // Check IV increment. Reject this PN if increment operation is not
276 // an add or increment value can not be represented by an integer.
277 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
278 if (!Incr || Incr->getOpcode() != Instruction::FAdd)
279 return std::nullopt;
280
281 // If this is not an add of the PHI with a constantfp, or if the constant fp
282 // is not an integer, bail out.
283 auto *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
284 if (!IncValueVal || Incr->getOperand(0) != PN)
285 return std::nullopt;
286
287 // Check Incr uses. One user is PN and the other user is an exit condition
288 // used by the conditional terminator.
289 // TODO: Should relax this, so as to allow any `fpext` that may occur.
290 if (!Incr->hasNUses(2))
291 return std::nullopt;
292
293 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
294 // only used by a branch, we can't transform it.
295 auto It = llvm::find_if(Incr->users(),
296 [](const User *U) { return isa<FCmpInst>(U); });
297 if (It == Incr->users().end())
298 return std::nullopt;
299
300 FCmpInst *Compare = cast<FCmpInst>(*It);
301 if (!Compare->hasOneUse())
302 return std::nullopt;
303
304 // We need to verify that the branch actually controls the iteration count
305 // of the loop. If not, the new IV can overflow and no one will notice.
306 // The branch block must be in the loop and one of the successors must be out
307 // of the loop.
308 auto *BI = dyn_cast<CondBrInst>(Compare->user_back());
309 if (!BI)
310 return std::nullopt;
311
312 if (!L->contains(BI->getParent()) ||
313 (L->contains(BI->getSuccessor(0)) && L->contains(BI->getSuccessor(1))))
314 return std::nullopt;
315
316 // If it isn't a comparison with an integer-as-fp (the exit value), we can't
317 // transform it.
318 auto *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
319 if (!ExitValueVal)
320 return std::nullopt;
321
322 return FloatingPointIV(InitValueVal->getValueAPF(),
323 IncValueVal->getValueAPF(),
324 ExitValueVal->getValueAPF(), Compare, Incr);
325}
326
327/// Ensure that the floating-point IV can be converted to a semantics-preserving
328/// signed 32-bit integer IV.
329///
330/// Returns a IntegerIV struct if possible, std::nullopt otherwise.
331static std::optional<IntegerIV>
333 // Convert floating-point predicate to integer.
334 auto NewPred = getIntegerPredicate(FPIV.Compare->getPredicate());
335 if (NewPred == CmpInst::BAD_ICMP_PREDICATE)
336 return std::nullopt;
337
338 // Convert APFloat values to signed integers.
339 int64_t InitValue, IncrValue, ExitValue;
340 if (!ConvertToSInt(FPIV.InitValue, InitValue) ||
341 !ConvertToSInt(FPIV.IncrValue, IncrValue) ||
342 !ConvertToSInt(FPIV.ExitValue, ExitValue))
343 return std::nullopt;
344
345 // Bail out if integers cannot be represented exactly.
346 if (!isRepresentableAsExactInteger(FPIV.InitValue, InitValue) ||
348 return std::nullopt;
349
350 // We convert the floating point induction variable to a signed i32 value if
351 // we can. This is only safe if the comparison will not overflow in a way that
352 // won't be trapped by the integer equivalent operations. Check for this now.
353 // TODO: We could use i64 if it is native and the range requires it.
354
355 // The start/stride/exit values must all fit in signed i32.
356 if (!isInt<32>(InitValue) || !isInt<32>(IncrValue) || !isInt<32>(ExitValue))
357 return std::nullopt;
358
359 // If not actually striding (add x, 0.0), avoid touching the code.
360 if (IncrValue == 0)
361 return std::nullopt;
362
363 // Positive and negative strides have different safety conditions.
364 if (IncrValue > 0) {
365 // If we have a positive stride, we require the init to be less than the
366 // exit value.
367 if (InitValue >= ExitValue)
368 return std::nullopt;
369
370 uint32_t Range = uint32_t(ExitValue - InitValue);
371 // Check for infinite loop, either:
372 // while (i <= Exit) or until (i > Exit)
373 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
374 if (++Range == 0)
375 return std::nullopt; // Range overflows.
376 }
377
378 unsigned Leftover = Range % uint32_t(IncrValue);
379
380 // If this is an equality comparison, we require that the strided value
381 // exactly land on the exit value, otherwise the IV condition will wrap
382 // around and do things the fp IV wouldn't.
383 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
384 Leftover != 0)
385 return std::nullopt;
386
387 // If the stride would wrap around the i32 before exiting, we can't
388 // transform the IV.
389 if (Leftover != 0 && int32_t(ExitValue + IncrValue) < ExitValue)
390 return std::nullopt;
391 } else {
392 // If we have a negative stride, we require the init to be greater than the
393 // exit value.
394 if (InitValue <= ExitValue)
395 return std::nullopt;
396
397 uint32_t Range = uint32_t(InitValue - ExitValue);
398 // Check for infinite loop, either:
399 // while (i >= Exit) or until (i < Exit)
400 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
401 if (++Range == 0)
402 return std::nullopt; // Range overflows.
403 }
404
405 unsigned Leftover = Range % uint32_t(-IncrValue);
406
407 // If this is an equality comparison, we require that the strided value
408 // exactly land on the exit value, otherwise the IV condition will wrap
409 // around and do things the fp IV wouldn't.
410 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
411 Leftover != 0)
412 return std::nullopt;
413
414 // If the stride would wrap around the i32 before exiting, we can't
415 // transform the IV.
416 if (Leftover != 0 && int32_t(ExitValue + IncrValue) > ExitValue)
417 return std::nullopt;
418 }
419
420 return IntegerIV{InitValue, IncrValue, ExitValue, NewPred};
421}
422
423/// Rewrite the floating-point IV as an integer IV.
425 const FloatingPointIV &FPIV,
426 const IntegerIV &IIV,
427 const TargetLibraryInfo *TLI,
428 std::unique_ptr<MemorySSAUpdater> &MSSAU) {
429 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
430 unsigned BackEdge = IncomingEdge ^ 1;
431
433 auto *Incr = cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
434 auto *BI = cast<CondBrInst>(FPIV.Compare->user_back());
435
436 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting floating-point IV to integer IV:\n"
437 << " Init: " << IIV.InitValue << "\n"
438 << " Incr: " << IIV.IncrValue << "\n"
439 << " Exit: " << IIV.ExitValue << "\n"
440 << " Pred: " << CmpInst::getPredicateName(IIV.NewPred)
441 << "\n"
442 << " Original PN: " << *PN << "\n");
443
444 // Insert new integer induction variable.
445 PHINode *NewPHI =
446 PHINode::Create(Int32Ty, 2, PN->getName() + ".int", PN->getIterator());
448 PN->getIncomingBlock(IncomingEdge));
449 NewPHI->setDebugLoc(PN->getDebugLoc());
450
451 Instruction *NewAdd = BinaryOperator::CreateAdd(
453 Incr->getName() + ".int", Incr->getIterator());
454 NewAdd->setDebugLoc(Incr->getDebugLoc());
455 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
456
457 ICmpInst *NewCompare = new ICmpInst(
458 BI->getIterator(), IIV.NewPred, NewAdd,
460 NewCompare->setDebugLoc(FPIV.Compare->getDebugLoc());
461
462 // In the following deletions, PN may become dead and may be deleted.
463 // Use a WeakTrackingVH to observe whether this happens.
464 WeakTrackingVH WeakPH = PN;
465
466 // Delete the old floating point exit comparison. The branch starts using the
467 // new comparison.
468 NewCompare->takeName(FPIV.Compare);
469 FPIV.Compare->replaceAllUsesWith(NewCompare);
471
472 // Delete the old floating point increment.
473 Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
474 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
475
476 // If the FP induction variable still has uses, this is because something else
477 // in the loop uses its value. In order to canonicalize the induction
478 // variable, we chose to eliminate the IV and rewrite it in terms of an
479 // int->fp cast.
480 //
481 // We give preference to sitofp over uitofp because it is faster on most
482 // platforms.
483 if (WeakPH) {
484 Instruction *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
485 PN->getParent()->getFirstInsertionPt());
486 Conv->setDebugLoc(PN->getDebugLoc());
487 PN->replaceAllUsesWith(Conv);
488 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
489 }
490}
491
492/// If the loop has a floating induction variable, then insert corresponding
493/// integer induction variable if possible. For example, the following:
494/// for(double i = 0; i < 10000; ++i)
495/// bar(i)
496/// is converted into
497/// for(int i = 0; i < 10000; ++i)
498/// bar((double)i);
499bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
500 // See if the PN matches a floating-point IV pattern.
501 auto FPIV = maybeFloatingPointRecurrence(L, PN);
502 if (!FPIV)
503 return false;
504
505 // Can we safely convert the floating-point values to integer ones?
506 auto IIV = tryConvertToIntegerIV(*FPIV);
507 if (!IIV)
508 return false;
509
510 // Perform the rewriting.
511 canonicalizeToIntegerIV(L, PN, *FPIV, *IIV, TLI, MSSAU);
512 return true;
513}
514
515bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
516 // First step. Check to see if there are any floating-point recurrences.
517 // If there are, change them into integer recurrences, permitting analysis by
518 // the SCEV routines.
519 BasicBlock *Header = L->getHeader();
520
522
523 bool Changed = false;
524 for (WeakTrackingVH &PHI : PHIs)
525 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHI))
526 Changed |= handleFloatingPointIV(L, PN);
527
528 // If the loop previously had floating-point IV, ScalarEvolution
529 // may not have been able to compute a trip count. Now that we've done some
530 // re-writing, the trip count may be computable.
531 if (Changed)
532 SE->forgetLoop(L);
533 return Changed;
534}
535
536//===---------------------------------------------------------------------===//
537// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
538// they will exit at the first iteration.
539//===---------------------------------------------------------------------===//
540
541/// Check to see if this loop has loop invariant conditions which lead to loop
542/// exits. If so, we know that if the exit path is taken, it is at the first
543/// loop iteration. This lets us predict exit values of PHI nodes that live in
544/// loop header.
545bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
546 // Verify the input to the pass is already in LCSSA form.
547 assert(L->isLCSSAForm(*DT));
548
549 SmallVector<BasicBlock *, 8> ExitBlocks;
550 L->getUniqueExitBlocks(ExitBlocks);
551
552 bool MadeAnyChanges = false;
553 for (auto *ExitBB : ExitBlocks) {
554 // If there are no more PHI nodes in this exit block, then no more
555 // values defined inside the loop are used on this path.
556 for (PHINode &PN : ExitBB->phis()) {
557 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
558 IncomingValIdx != E; ++IncomingValIdx) {
559 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
560
561 // Can we prove that the exit must run on the first iteration if it
562 // runs at all? (i.e. early exits are fine for our purposes, but
563 // traces which lead to this exit being taken on the 2nd iteration
564 // aren't.) Note that this is about whether the exit branch is
565 // executed, not about whether it is taken.
566 if (!L->getLoopLatch() ||
567 !DT->dominates(IncomingBB, L->getLoopLatch()))
568 continue;
569
570 // Get condition that leads to the exit path.
571 auto *TermInst = IncomingBB->getTerminator();
572
573 Value *Cond = nullptr;
574 if (auto *BI = dyn_cast<CondBrInst>(TermInst)) {
575 // Must be a conditional branch, otherwise the block
576 // should not be in the loop.
577 Cond = BI->getCondition();
578 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
579 Cond = SI->getCondition();
580 else
581 continue;
582
583 if (!L->isLoopInvariant(Cond))
584 continue;
585
586 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
587
588 // Only deal with PHIs in the loop header.
589 if (!ExitVal || ExitVal->getParent() != L->getHeader())
590 continue;
591
592 // If ExitVal is a PHI on the loop header, then we know its
593 // value along this exit because the exit can only be taken
594 // on the first iteration.
595 auto *LoopPreheader = L->getLoopPreheader();
596 assert(LoopPreheader && "Invalid loop");
597 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
598 if (PreheaderIdx != -1) {
599 assert(ExitVal->getParent() == L->getHeader() &&
600 "ExitVal must be in loop header");
601 MadeAnyChanges = true;
602 PN.setIncomingValue(IncomingValIdx,
603 ExitVal->getIncomingValue(PreheaderIdx));
604 SE->forgetValue(&PN);
605 }
606 }
607 }
608 }
609 return MadeAnyChanges;
610}
611
612//===----------------------------------------------------------------------===//
613// IV Widening - Extend the width of an IV to cover its widest uses.
614//===----------------------------------------------------------------------===//
615
616/// Update information about the induction variable that is extended by this
617/// sign or zero extend operation. This is used to determine the final width of
618/// the IV before actually widening it.
619static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
620 ScalarEvolution *SE,
621 const TargetTransformInfo *TTI) {
622 bool IsSigned = Cast->getOpcode() == Instruction::SExt;
623 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
624 return;
625
626 Type *Ty = Cast->getType();
627 uint64_t Width = SE->getTypeSizeInBits(Ty);
628 if (!Cast->getDataLayout().isLegalInteger(Width))
629 return;
630
631 // Check that `Cast` actually extends the induction variable (we rely on this
632 // later). This takes care of cases where `Cast` is extending a truncation of
633 // the narrow induction variable, and thus can end up being narrower than the
634 // "narrow" induction variable.
635 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
636 if (NarrowIVWidth >= Width)
637 return;
638
639 // Cast is either an sext or zext up to this point.
640 // We should not widen an indvar if arithmetics on the wider indvar are more
641 // expensive than those on the narrower indvar. We check only the cost of ADD
642 // because at least an ADD is required to increment the induction variable. We
643 // could compute more comprehensively the cost of all instructions on the
644 // induction variable when necessary.
645 if (TTI &&
646 TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
647 TTI->getArithmeticInstrCost(Instruction::Add,
648 Cast->getOperand(0)->getType())) {
649 return;
650 }
651
652 if (!WI.WidestNativeType ||
653 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
655 WI.IsSigned = IsSigned;
656 return;
657 }
658
659 // We extend the IV to satisfy the sign of its user(s), or 'signed'
660 // if there are multiple users with both sign- and zero extensions,
661 // in order not to introduce nondeterministic behaviour based on the
662 // unspecified order of a PHI nodes' users-iterator.
663 WI.IsSigned |= IsSigned;
664}
665
666//===----------------------------------------------------------------------===//
667// Live IV Reduction - Minimize IVs live across the loop.
668//===----------------------------------------------------------------------===//
669
670//===----------------------------------------------------------------------===//
671// Simplification of IV users based on SCEV evaluation.
672//===----------------------------------------------------------------------===//
673
674namespace {
675
676class IndVarSimplifyVisitor : public IVVisitor {
677 ScalarEvolution *SE;
678 const TargetTransformInfo *TTI;
679 PHINode *IVPhi;
680
681public:
682 WideIVInfo WI;
683
684 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
685 const TargetTransformInfo *TTI,
686 const DominatorTree *DTree)
687 : SE(SCEV), TTI(TTI), IVPhi(IV) {
688 DT = DTree;
689 WI.NarrowIV = IVPhi;
690 }
691
692 // Implement the interface used by simplifyUsersOfIV.
693 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
694};
695
696} // end anonymous namespace
697
698/// Iteratively perform simplification on a worklist of IV users. Each
699/// successive simplification may push more users which may themselves be
700/// candidates for simplification.
701///
702/// Sign/Zero extend elimination is interleaved with IV simplification.
703bool IndVarSimplify::simplifyAndExtend(Loop *L,
704 SCEVExpander &Rewriter,
705 LoopInfo *LI) {
707
708 auto *GuardDecl = Intrinsic::getDeclarationIfExists(
709 L->getBlocks()[0]->getModule(), Intrinsic::experimental_guard);
710 bool HasGuards = GuardDecl && !GuardDecl->use_empty();
711
713 llvm::make_pointer_range(L->getHeader()->phis()));
714
715 // Each round of simplification iterates through the SimplifyIVUsers worklist
716 // for all current phis, then determines whether any IVs can be
717 // widened. Widening adds new phis to LoopPhis, inducing another round of
718 // simplification on the wide IVs.
719 bool Changed = false;
720 while (!LoopPhis.empty()) {
721 // Evaluate as many IV expressions as possible before widening any IVs. This
722 // forces SCEV to set no-wrap flags before evaluating sign/zero
723 // extension. The first time SCEV attempts to normalize sign/zero extension,
724 // the result becomes final. So for the most predictable results, we delay
725 // evaluation of sign/zero extend evaluation until needed, and avoid running
726 // other SCEV based analysis prior to simplifyAndExtend.
727 do {
728 PHINode *CurrIV = LoopPhis.pop_back_val();
729
730 // Information about sign/zero extensions of CurrIV.
731 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
732
733 const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts,
734 Rewriter, &Visitor);
735
736 Changed |= C;
737 RunUnswitching |= U;
738 if (Visitor.WI.WidestNativeType) {
739 WideIVs.push_back(Visitor.WI);
740 }
741 } while(!LoopPhis.empty());
742
743 // Continue if we disallowed widening.
744 if (!WidenIndVars)
745 continue;
746
747 for (; !WideIVs.empty(); WideIVs.pop_back()) {
748 unsigned ElimExt;
749 unsigned Widened;
750 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
751 DT, DeadInsts, ElimExt, Widened,
752 HasGuards, UsePostIncrementRanges)) {
753 NumElimExt += ElimExt;
754 NumWidened += Widened;
755 Changed = true;
756 LoopPhis.push_back(WidePhi);
757 }
758 }
759 }
760 return Changed;
761}
762
763//===----------------------------------------------------------------------===//
764// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
765//===----------------------------------------------------------------------===//
766
767/// Given an Value which is hoped to be part of an add recurance in the given
768/// loop, return the associated Phi node if so. Otherwise, return null. Note
769/// that this is less general than SCEVs AddRec checking.
772 if (!IncI)
773 return nullptr;
774
775 switch (IncI->getOpcode()) {
776 case Instruction::Add:
777 case Instruction::Sub:
778 break;
779 case Instruction::GetElementPtr:
780 // An IV counter must preserve its type.
781 if (IncI->getNumOperands() == 2)
782 break;
783 [[fallthrough]];
784 default:
785 return nullptr;
786 }
787
788 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
789 if (Phi && Phi->getParent() == L->getHeader()) {
790 if (L->isLoopInvariant(IncI->getOperand(1)))
791 return Phi;
792 return nullptr;
793 }
794 if (IncI->getOpcode() == Instruction::GetElementPtr)
795 return nullptr;
796
797 // Allow add/sub to be commuted.
798 Phi = dyn_cast<PHINode>(IncI->getOperand(1));
799 if (Phi && Phi->getParent() == L->getHeader()) {
800 if (L->isLoopInvariant(IncI->getOperand(0)))
801 return Phi;
802 }
803 return nullptr;
804}
805
806/// Whether the current loop exit test is based on this value. Currently this
807/// is limited to a direct use in the loop condition.
808static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
809 CondBrInst *BI = cast<CondBrInst>(ExitingBB->getTerminator());
811 // TODO: Allow non-icmp loop test.
812 if (!ICmp)
813 return false;
814
815 // TODO: Allow indirect use.
816 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
817}
818
819/// linearFunctionTestReplace policy. Return true unless we can show that the
820/// current exit test is already sufficiently canonical.
821static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
822 assert(L->getLoopLatch() && "Must be in simplified form");
823
824 // Avoid converting a constant or loop invariant test back to a runtime
825 // test. This is critical for when SCEV's cached ExitCount is less precise
826 // than the current IR (such as after we've proven a particular exit is
827 // actually dead and thus the BE count never reaches our ExitCount.)
828 CondBrInst *BI = cast<CondBrInst>(ExitingBB->getTerminator());
829 if (L->isLoopInvariant(BI->getCondition()))
830 return false;
831
832 // Do LFTR to simplify the exit condition to an ICMP.
834 if (!Cond)
835 return true;
836
837 // Do LFTR to simplify the exit ICMP to EQ/NE
838 ICmpInst::Predicate Pred = Cond->getPredicate();
839 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
840 return true;
841
842 // Look for a loop invariant RHS
843 Value *LHS = Cond->getOperand(0);
844 Value *RHS = Cond->getOperand(1);
845 if (!L->isLoopInvariant(RHS)) {
846 if (!L->isLoopInvariant(LHS))
847 return true;
848 std::swap(LHS, RHS);
849 }
850 // Look for a simple IV counter LHS
852 if (!Phi)
853 Phi = getLoopPhiForCounter(LHS, L);
854
855 if (!Phi)
856 return true;
857
858 // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
859 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
860 if (Idx < 0)
861 return true;
862
863 // Do LFTR if the exit condition's IV is *not* a simple counter.
864 Value *IncV = Phi->getIncomingValue(Idx);
865 return Phi != getLoopPhiForCounter(IncV, L);
866}
867
868/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
869/// down to checking that all operands are constant and listing instructions
870/// that may hide undef.
872 unsigned Depth) {
873 if (isa<Constant>(V))
874 return !isa<UndefValue>(V);
875
876 if (Depth >= 6)
877 return false;
878
879 // Conservatively handle non-constant non-instructions. For example, Arguments
880 // may be undef.
882 if (!I)
883 return false;
884
885 // Load and return values may be undef.
886 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
887 return false;
888
889 // Optimistically handle other instructions.
890 for (Value *Op : I->operands()) {
891 if (!Visited.insert(Op).second)
892 continue;
893 if (!hasConcreteDefImpl(Op, Visited, Depth+1))
894 return false;
895 }
896 return true;
897}
898
899/// Return true if the given value is concrete. We must prove that undef can
900/// never reach it.
901///
902/// TODO: If we decide that this is a good approach to checking for undef, we
903/// may factor it into a common location.
904static bool hasConcreteDef(Value *V) {
906 Visited.insert(V);
907 return hasConcreteDefImpl(V, Visited, 0);
908}
909
910/// Return true if the given phi is a "counter" in L. A counter is an
911/// add recurance (of integer or pointer type) with an arbitrary start, and a
912/// step of 1. Note that L must have exactly one latch.
913static bool isLoopCounter(PHINode* Phi, Loop *L,
914 ScalarEvolution *SE) {
915 assert(Phi->getParent() == L->getHeader());
916 assert(L->getLoopLatch());
917
918 if (!SE->isSCEVable(Phi->getType()))
919 return false;
920
921 const SCEV *S = SE->getSCEV(Phi);
923 return false;
924
925 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
926 Value *IncV = Phi->getIncomingValue(LatchIdx);
927 return (getLoopPhiForCounter(IncV, L) == Phi &&
928 isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
929}
930
931/// Search the loop header for a loop counter (anadd rec w/step of one)
932/// suitable for use by LFTR. If multiple counters are available, select the
933/// "best" one based profitable heuristics.
934///
935/// BECount may be an i8* pointer type. The pointer difference is already
936/// valid count without scaling the address stride, so it remains a pointer
937/// expression as far as SCEV is concerned.
938static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
939 const SCEV *BECount,
941 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
942
943 Value *Cond = cast<CondBrInst>(ExitingBB->getTerminator())->getCondition();
944
945 // Loop over all of the PHI nodes, looking for a simple counter.
946 PHINode *BestPhi = nullptr;
947 const SCEV *BestInit = nullptr;
948 BasicBlock *LatchBlock = L->getLoopLatch();
949 assert(LatchBlock && "Must be in simplified form");
950 const DataLayout &DL = L->getHeader()->getDataLayout();
951
952 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
953 PHINode *Phi = cast<PHINode>(I);
954 if (!isLoopCounter(Phi, L, SE))
955 continue;
956
957 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
958
959 // AR may be a pointer type, while BECount is an integer type.
960 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
961 // AR may not be a narrower type, or we may never exit.
962 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
963 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
964 continue;
965
966 // Avoid reusing a potentially undef value to compute other values that may
967 // have originally had a concrete definition.
968 if (!hasConcreteDef(Phi)) {
969 // We explicitly allow unknown phis as long as they are already used by
970 // the loop exit test. This is legal since performing LFTR could not
971 // increase the number of undef users.
972 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
973 if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
974 !isLoopExitTestBasedOn(IncPhi, ExitingBB))
975 continue;
976 }
977
978 // Avoid introducing undefined behavior due to poison which didn't exist in
979 // the original program. (Annoyingly, the rules for poison and undef
980 // propagation are distinct, so this does NOT cover the undef case above.)
981 // We have to ensure that we don't introduce UB by introducing a use on an
982 // iteration where said IV produces poison. Our strategy here differs for
983 // pointers and integer IVs. For integers, we strip and reinfer as needed,
984 // see code in linearFunctionTestReplace. For pointers, we restrict
985 // transforms as there is no good way to reinfer inbounds once lost.
986 if (!Phi->getType()->isIntegerTy() &&
987 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
988 continue;
989
990 const SCEV *Init = AR->getStart();
991
992 if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) {
993 // Don't force a live loop counter if another IV can be used.
994 if (isAlmostDeadIV(Phi, LatchBlock, Cond))
995 continue;
996
997 // Prefer to count-from-zero. This is a more "canonical" counter form. It
998 // also prefers integer to pointer IVs.
999 if (BestInit->isZero() != Init->isZero()) {
1000 if (BestInit->isZero())
1001 continue;
1002 }
1003 // If two IVs both count from zero or both count from nonzero then the
1004 // narrower is likely a dead phi that has been widened. Use the wider phi
1005 // to allow the other to be eliminated.
1006 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
1007 continue;
1008 }
1009 BestPhi = Phi;
1010 BestInit = Init;
1011 }
1012 return BestPhi;
1013}
1014
1015/// Insert an IR expression which computes the value held by the IV IndVar
1016/// (which must be an loop counter w/unit stride) after the backedge of loop L
1017/// is taken ExitCount times.
1018static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
1019 const SCEV *ExitCount, bool UsePostInc, Loop *L,
1020 SCEVExpander &Rewriter, ScalarEvolution *SE) {
1021 assert(isLoopCounter(IndVar, L, SE));
1022 assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
1023 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
1024 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
1025
1026 // For integer IVs, truncate the IV before computing the limit unless we
1027 // know apriori that the limit must be a constant when evaluated in the
1028 // bitwidth of the IV. We prefer (potentially) keeping a truncate of the
1029 // IV in the loop over a (potentially) expensive expansion of the widened
1030 // exit count add(zext(add)) expression.
1031 if (IndVar->getType()->isIntegerTy() &&
1032 SE->getTypeSizeInBits(AR->getType()) >
1033 SE->getTypeSizeInBits(ExitCount->getType())) {
1034 const SCEV *IVInit = AR->getStart();
1035 if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount)) {
1036 const SCEV *TruncExpr = SE->getTruncateExpr(AR, ExitCount->getType());
1037
1038 // The following bailout is necessary due to the interaction with
1039 // the depth limit in SCEV analysis.
1040 if (!isa<SCEVAddRecExpr>(TruncExpr))
1041 return nullptr;
1042 AR = cast<SCEVAddRecExpr>(TruncExpr);
1043 }
1044 }
1045
1046 const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
1047 const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
1048 assert(SE->isLoopInvariant(IVLimit, L) &&
1049 "Computed iteration count is not loop invariant!");
1050 return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
1051 ExitingBB->getTerminator());
1052}
1053
1054/// This method rewrites the exit condition of the loop to be a canonical !=
1055/// comparison against the incremented loop induction variable. This pass is
1056/// able to rewrite the exit tests of any loop where the SCEV analysis can
1057/// determine a loop-invariant trip count of the loop, which is actually a much
1058/// broader range than just linear tests.
1059bool IndVarSimplify::
1060linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
1061 const SCEV *ExitCount,
1062 PHINode *IndVar, SCEVExpander &Rewriter) {
1063 assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1064 assert(isLoopCounter(IndVar, L, SE));
1065 Instruction * const IncVar =
1066 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
1067
1068 // Initialize CmpIndVar to the preincremented IV.
1069 Value *CmpIndVar = IndVar;
1070 bool UsePostInc = false;
1071
1072 // If the exiting block is the same as the backedge block, we prefer to
1073 // compare against the post-incremented value, otherwise we must compare
1074 // against the preincremented value.
1075 if (ExitingBB == L->getLoopLatch()) {
1076 // For pointer IVs, we chose to not strip inbounds which requires us not
1077 // to add a potentially UB introducing use. We need to either a) show
1078 // the loop test we're modifying is already in post-inc form, or b) show
1079 // that adding a use must not introduce UB.
1080 bool SafeToPostInc =
1081 IndVar->getType()->isIntegerTy() ||
1082 isLoopExitTestBasedOn(IncVar, ExitingBB) ||
1083 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
1084 if (SafeToPostInc) {
1085 UsePostInc = true;
1086 CmpIndVar = IncVar;
1087 }
1088 }
1089
1090 Value *ExitCnt =
1091 genLoopLimit(IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1092 if (!ExitCnt)
1093 return false;
1094
1095 assert(ExitCnt->getType()->isPointerTy() ==
1096 IndVar->getType()->isPointerTy() &&
1097 "genLoopLimit missed a cast");
1098
1099 // It may be necessary to drop nowrap flags on the incrementing instruction
1100 // if either LFTR moves from a pre-inc check to a post-inc check (in which
1101 // case the increment might have previously been poison on the last iteration
1102 // only) or if LFTR switches to a different IV that was previously dynamically
1103 // dead (and as such may be arbitrarily poison). We remove any nowrap flags
1104 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1105 // check), because the pre-inc addrec flags may be adopted from the original
1106 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1107 // TODO: This handling is inaccurate for one case: If we switch to a
1108 // dynamically dead IV that wraps on the first loop iteration only, which is
1109 // not covered by the post-inc addrec. (If the new IV was not dynamically
1110 // dead, it could not be poison on the first iteration in the first place.)
1111 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
1112 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
1113 if (BO->hasNoUnsignedWrap())
1114 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1115 if (BO->hasNoSignedWrap())
1116 BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1117 }
1118
1119 // Insert a new icmp_ne or icmp_eq instruction before the branch.
1120 CondBrInst *BI = cast<CondBrInst>(ExitingBB->getTerminator());
1121 ICmpInst::Predicate P;
1122 if (L->contains(BI->getSuccessor(0)))
1123 P = ICmpInst::ICMP_NE;
1124 else
1125 P = ICmpInst::ICMP_EQ;
1126
1127 IRBuilder<> Builder(BI);
1128
1129 // The new loop exit condition should reuse the debug location of the
1130 // original loop exit condition.
1131 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1132 Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1133
1134 // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1135 // avoid the expensive expansion of the limit expression in the wider type,
1136 // emit a truncate to narrow the IV to the ExitCount type. This is safe
1137 // since we know (from the exit count bitwidth), that we can't self-wrap in
1138 // the narrower type.
1139 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1140 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1141 if (CmpIndVarSize > ExitCntSize) {
1142 assert(!CmpIndVar->getType()->isPointerTy() &&
1143 !ExitCnt->getType()->isPointerTy());
1144
1145 // Before resorting to actually inserting the truncate, use the same
1146 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1147 // the other side of the comparison instead. We still evaluate the limit
1148 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1149 // a truncate within in.
1150 bool Extended = false;
1151 const SCEV *IV = SE->getSCEV(CmpIndVar);
1152 const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType());
1153 const SCEV *ZExtTrunc =
1154 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1155
1156 if (ZExtTrunc == IV) {
1157 Extended = true;
1158 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1159 "wide.trip.count");
1160 } else {
1161 const SCEV *SExtTrunc =
1162 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1163 if (SExtTrunc == IV) {
1164 Extended = true;
1165 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1166 "wide.trip.count");
1167 }
1168 }
1169
1170 if (Extended) {
1171 bool Discard;
1172 L->makeLoopInvariant(ExitCnt, Discard);
1173 } else
1174 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1175 "lftr.wideiv");
1176 }
1177 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1178 << " LHS:" << *CmpIndVar << '\n'
1179 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1180 << "\n"
1181 << " RHS:\t" << *ExitCnt << "\n"
1182 << "ExitCount:\t" << *ExitCount << "\n"
1183 << " was: " << *BI->getCondition() << "\n");
1184
1185 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1186 Value *OrigCond = BI->getCondition();
1187 // It's tempting to use replaceAllUsesWith here to fully replace the old
1188 // comparison, but that's not immediately safe, since users of the old
1189 // comparison may not be dominated by the new comparison. Instead, just
1190 // update the branch to use the new comparison; in the common case this
1191 // will make old comparison dead.
1192 BI->setCondition(Cond);
1193 DeadInsts.emplace_back(OrigCond);
1194
1195 ++NumLFTR;
1196 return true;
1197}
1198
1199//===----------------------------------------------------------------------===//
1200// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1201//===----------------------------------------------------------------------===//
1202
1203/// If there's a single exit block, sink any loop-invariant values that
1204/// were defined in the preheader but not used inside the loop into the
1205/// exit block to reduce register pressure in the loop.
1206bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1207 BasicBlock *ExitBlock = L->getExitBlock();
1208 if (!ExitBlock) return false;
1209
1210 BasicBlock *Preheader = L->getLoopPreheader();
1211 if (!Preheader) return false;
1212
1213 bool MadeAnyChanges = false;
1214 for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
1215
1216 // Skip BB Terminator.
1217 if (Preheader->getTerminator() == &I)
1218 continue;
1219
1220 // New instructions were inserted at the end of the preheader.
1221 if (isa<PHINode>(I))
1222 break;
1223
1224 // Don't move instructions which might have side effects, since the side
1225 // effects need to complete before instructions inside the loop. Also don't
1226 // move instructions which might read memory, since the loop may modify
1227 // memory. Note that it's okay if the instruction might have undefined
1228 // behavior: LoopSimplify guarantees that the preheader dominates the exit
1229 // block.
1230 if (I.mayHaveSideEffects() || I.mayReadFromMemory())
1231 continue;
1232
1233 // Skip debug or pseudo instructions.
1234 if (I.isDebugOrPseudoInst())
1235 continue;
1236
1237 // Skip eh pad instructions.
1238 if (I.isEHPad())
1239 continue;
1240
1241 // Don't sink alloca: we never want to sink static alloca's out of the
1242 // entry block, and correctly sinking dynamic alloca's requires
1243 // checks for stacksave/stackrestore intrinsics.
1244 // FIXME: Refactor this check somehow?
1245 if (isa<AllocaInst>(&I))
1246 continue;
1247
1248 // Determine if there is a use in or before the loop (direct or
1249 // otherwise).
1250 bool UsedInLoop = false;
1251 for (Use &U : I.uses()) {
1252 Instruction *User = cast<Instruction>(U.getUser());
1253 BasicBlock *UseBB = User->getParent();
1254 if (PHINode *P = dyn_cast<PHINode>(User)) {
1255 unsigned i =
1257 UseBB = P->getIncomingBlock(i);
1258 }
1259 if (UseBB == Preheader || L->contains(UseBB)) {
1260 UsedInLoop = true;
1261 break;
1262 }
1263 }
1264
1265 // If there is, the def must remain in the preheader.
1266 if (UsedInLoop)
1267 continue;
1268
1269 // Otherwise, sink it to the exit block.
1270 I.moveBefore(ExitBlock->getFirstInsertionPt());
1271 SE->forgetValue(&I);
1272 MadeAnyChanges = true;
1273 }
1274
1275 return MadeAnyChanges;
1276}
1277
1278static void replaceExitCond(CondBrInst *BI, Value *NewCond,
1280 auto *OldCond = BI->getCondition();
1281 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1282 << " with " << *NewCond << "\n");
1283 BI->setCondition(NewCond);
1284 if (OldCond->use_empty())
1285 DeadInsts.emplace_back(OldCond);
1286}
1287
1288static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1289 bool IsTaken) {
1290 CondBrInst *BI = cast<CondBrInst>(ExitingBB->getTerminator());
1291 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1292 auto *OldCond = BI->getCondition();
1293 return ConstantInt::get(OldCond->getType(),
1294 IsTaken ? ExitIfTrue : !ExitIfTrue);
1295}
1296
1297static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1299 CondBrInst *BI = cast<CondBrInst>(ExitingBB->getTerminator());
1300 auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1301 replaceExitCond(BI, NewCond, DeadInsts);
1302}
1303
1305 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1306 ScalarEvolution &SE) {
1307 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1308 auto *LoopPreheader = L->getLoopPreheader();
1309 auto *LoopHeader = L->getHeader();
1311 for (auto &PN : LoopHeader->phis()) {
1312 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1313 for (User *U : PN.users())
1314 Worklist.push_back(cast<Instruction>(U));
1315 SE.forgetValue(&PN);
1316 PN.replaceAllUsesWith(PreheaderIncoming);
1317 DeadInsts.emplace_back(&PN);
1318 }
1319
1320 // Replacing with the preheader value will often allow IV users to simplify
1321 // (especially if the preheader value is a constant).
1323 while (!Worklist.empty()) {
1324 auto *I = cast<Instruction>(Worklist.pop_back_val());
1325 if (!Visited.insert(I).second)
1326 continue;
1327
1328 // Don't simplify instructions outside the loop.
1329 if (!L->contains(I))
1330 continue;
1331
1332 Value *Res = simplifyInstruction(I, I->getDataLayout());
1333 if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1334 for (User *U : I->users())
1335 Worklist.push_back(cast<Instruction>(U));
1336 I->replaceAllUsesWith(Res);
1337 DeadInsts.emplace_back(I);
1338 }
1339 }
1340}
1341
1342static Value *
1345 SCEVExpander &Rewriter) {
1346 ICmpInst::Predicate InvariantPred = LIP.Pred;
1347 BasicBlock *Preheader = L->getLoopPreheader();
1348 assert(Preheader && "Preheader doesn't exist");
1349 Rewriter.setInsertPoint(Preheader->getTerminator());
1350 auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1351 auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1352 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1353 if (ExitIfTrue)
1354 InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1355 IRBuilder<> Builder(Preheader->getTerminator());
1356 CondBrInst *BI = cast<CondBrInst>(ExitingBB->getTerminator());
1357 return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1358 BI->getCondition()->getName());
1359}
1360
1361static std::optional<Value *>
1362createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1363 const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1364 ScalarEvolution *SE, SCEVExpander &Rewriter) {
1365 CmpPredicate Pred = ICmp->getCmpPredicate();
1366 Value *LHS = ICmp->getOperand(0);
1367 Value *RHS = ICmp->getOperand(1);
1368
1369 // 'LHS pred RHS' should now mean that we stay in loop.
1370 auto *BI = cast<CondBrInst>(ExitingBB->getTerminator());
1371 if (Inverted)
1373
1374 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1375 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1376 // Can we prove it to be trivially true or false?
1377 if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1378 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1379
1380 auto *ARTy = LHSS->getType();
1381 auto *MaxIterTy = MaxIter->getType();
1382 // If possible, adjust types.
1383 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1384 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1385 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1386 const SCEV *MinusOne = SE->getMinusOne(ARTy);
1387 const SCEV *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1388 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1389 MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1390 }
1391
1392 if (SkipLastIter) {
1393 // Semantically skip last iter is "subtract 1, do not bother about unsigned
1394 // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1395 // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1396 // So we manually construct umin(a - 1, b - 1).
1397 SmallVector<SCEVUse, 4> Elements;
1398 if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1399 for (SCEVUse Op : UMin->operands())
1400 Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1401 MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1402 } else
1403 MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1404 }
1405
1406 // Check if there is a loop-invariant predicate equivalent to our check.
1407 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1408 L, BI, MaxIter);
1409 if (!LIP)
1410 return std::nullopt;
1411
1412 // Can we prove it to be trivially true?
1413 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1414 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1415 else
1416 return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1417}
1418
1420 const Loop *L, CondBrInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1421 bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1423 assert(
1424 (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1425 "Not a loop exit!");
1426
1427 // For branch that stays in loop by TRUE condition, go through AND. For branch
1428 // that stays in loop by FALSE condition, go through OR. Both gives the
1429 // similar logic: "stay in loop iff all conditions are true(false)".
1430 bool Inverted = L->contains(BI->getSuccessor(1));
1431 SmallVector<ICmpInst *, 4> LeafConditions;
1432 SmallVector<Value *, 4> Worklist;
1434 Value *OldCond = BI->getCondition();
1435 Visited.insert(OldCond);
1436 Worklist.push_back(OldCond);
1437
1438 auto GoThrough = [&](Value *V) {
1439 Value *LHS = nullptr, *RHS = nullptr;
1440 if (Inverted) {
1441 if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1442 return false;
1443 } else {
1444 if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1445 return false;
1446 }
1447 if (Visited.insert(LHS).second)
1448 Worklist.push_back(LHS);
1449 if (Visited.insert(RHS).second)
1450 Worklist.push_back(RHS);
1451 return true;
1452 };
1453
1454 do {
1455 Value *Curr = Worklist.pop_back_val();
1456 // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1457 // those with one use, to avoid instruction duplication.
1458 if (Curr->hasOneUse())
1459 if (!GoThrough(Curr))
1460 if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1461 LeafConditions.push_back(ICmp);
1462 } while (!Worklist.empty());
1463
1464 // If the current basic block has the same exit count as the whole loop, and
1465 // it consists of multiple icmp's, try to collect all icmp's that give exact
1466 // same exit count. For all other icmp's, we could use one less iteration,
1467 // because their value on the last iteration doesn't really matter.
1468 SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1469 if (!SkipLastIter && LeafConditions.size() > 1 &&
1470 SE->getExitCount(L, ExitingBB,
1472 MaxIter)
1473 for (auto *ICmp : LeafConditions) {
1474 auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1475 /*ControlsExit*/ false);
1476 const SCEV *ExitMax = EL.SymbolicMaxNotTaken;
1477 if (isa<SCEVCouldNotCompute>(ExitMax))
1478 continue;
1479 // They could be of different types (specifically this happens after
1480 // IV widening).
1481 auto *WiderType =
1482 SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1483 const SCEV *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1484 const SCEV *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1485 if (WideExitMax == WideMaxIter)
1486 ICmpsFailingOnLastIter.insert(ICmp);
1487 }
1488
1489 bool Changed = false;
1490 for (auto *OldCond : LeafConditions) {
1491 // Skip last iteration for this icmp under one of two conditions:
1492 // - We do it for all conditions;
1493 // - There is another ICmp that would fail on last iter, so this one doesn't
1494 // really matter.
1495 bool OptimisticSkipLastIter = SkipLastIter;
1496 if (!OptimisticSkipLastIter) {
1497 if (ICmpsFailingOnLastIter.size() > 1)
1498 OptimisticSkipLastIter = true;
1499 else if (ICmpsFailingOnLastIter.size() == 1)
1500 OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1501 }
1502 if (auto Replaced =
1503 createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1504 OptimisticSkipLastIter, SE, Rewriter)) {
1505 Changed = true;
1506 auto *NewCond = *Replaced;
1507 if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1508 NCI->setName(OldCond->getName() + ".first_iter");
1509 }
1510 LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1511 << " with " << *NewCond << "\n");
1512 assert(OldCond->hasOneUse() && "Must be!");
1513 OldCond->replaceAllUsesWith(NewCond);
1514 DeadInsts.push_back(OldCond);
1515 // Make sure we no longer consider this condition as failing on last
1516 // iteration.
1517 ICmpsFailingOnLastIter.erase(OldCond);
1518 }
1519 }
1520 return Changed;
1521}
1522
1523bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1524 // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1525 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1526 // never reaches the icmp since the zext doesn't fold to an AddRec unless
1527 // it already has flags. The alternative to this would be to extending the
1528 // set of "interesting" IV users to include the icmp, but doing that
1529 // regresses results in practice by querying SCEVs before trip counts which
1530 // rely on them which results in SCEV caching sub-optimal answers. The
1531 // concern about caching sub-optimal results is why we only query SCEVs of
1532 // the loop invariant RHS here.
1533 SmallVector<BasicBlock*, 16> ExitingBlocks;
1534 L->getExitingBlocks(ExitingBlocks);
1535 bool Changed = false;
1536 for (auto *ExitingBB : ExitingBlocks) {
1537 auto *BI = dyn_cast<CondBrInst>(ExitingBB->getTerminator());
1538 if (!BI)
1539 continue;
1540
1541 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1542 if (!ICmp || !ICmp->hasOneUse())
1543 continue;
1544
1545 auto *LHS = ICmp->getOperand(0);
1546 auto *RHS = ICmp->getOperand(1);
1547 // For the range reasoning, avoid computing SCEVs in the loop to avoid
1548 // poisoning cache with sub-optimal results. For the must-execute case,
1549 // this is a neccessary precondition for correctness.
1550 if (!L->isLoopInvariant(RHS)) {
1551 if (!L->isLoopInvariant(LHS))
1552 continue;
1553 // Same logic applies for the inverse case
1554 std::swap(LHS, RHS);
1555 }
1556
1557 // Match (icmp signed-cond zext, RHS)
1558 Value *LHSOp = nullptr;
1559 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1560 continue;
1561
1562 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1563 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1564 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1565 FullCR = FullCR.zeroExtend(OuterBitWidth);
1566 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1567 if (FullCR.contains(RHSCR)) {
1568 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1569 // replace the signed condition with the unsigned version.
1570 ICmp->setPredicate(ICmp->getUnsignedPredicate());
1571 Changed = true;
1572 // Note: No SCEV invalidation needed. We've changed the predicate, but
1573 // have not changed exit counts, or the values produced by the compare.
1574 continue;
1575 }
1576 }
1577
1578 // Now that we've canonicalized the condition to match the extend,
1579 // see if we can rotate the extend out of the loop.
1580 for (auto *ExitingBB : ExitingBlocks) {
1581 auto *BI = dyn_cast<CondBrInst>(ExitingBB->getTerminator());
1582 if (!BI)
1583 continue;
1584
1585 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1586 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1587 continue;
1588
1589 bool Swapped = false;
1590 auto *LHS = ICmp->getOperand(0);
1591 auto *RHS = ICmp->getOperand(1);
1592 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1593 // Nothing to rotate
1594 continue;
1595 if (L->isLoopInvariant(LHS)) {
1596 // Same logic applies for the inverse case until we actually pick
1597 // which operand of the compare to update.
1598 Swapped = true;
1599 std::swap(LHS, RHS);
1600 }
1601 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1602
1603 // Match (icmp unsigned-cond zext, RHS)
1604 // TODO: Extend to handle corresponding sext/signed-cmp case
1605 // TODO: Extend to other invertible functions
1606 Value *LHSOp = nullptr;
1607 if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1608 continue;
1609
1610 // In general, we only rotate if we can do so without increasing the number
1611 // of instructions. The exception is when we have an zext(add-rec). The
1612 // reason for allowing this exception is that we know we need to get rid
1613 // of the zext for SCEV to be able to compute a trip count for said loops;
1614 // we consider the new trip count valuable enough to increase instruction
1615 // count by one.
1616 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1617 continue;
1618
1619 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1620 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1621 // when zext is loop varying and RHS is loop invariant. This converts
1622 // loop varying work to loop-invariant work.
1623 auto doRotateTransform = [&]() {
1624 assert(ICmp->isUnsigned() && "must have proven unsigned already");
1625 auto *NewRHS = CastInst::Create(
1626 Instruction::Trunc, RHS, LHSOp->getType(), "",
1627 L->getLoopPreheader()->getTerminator()->getIterator());
1628 // NewRHS is an operation that has been hoisted out of the loop, and
1629 // therefore should have a dropped location.
1630 NewRHS->setDebugLoc(DebugLoc::getDropped());
1631 ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1632 ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1633 // Samesign flag cannot be preserved after narrowing the compare.
1634 ICmp->setSameSign(false);
1635 if (LHS->use_empty())
1636 DeadInsts.push_back(LHS);
1637 };
1638
1639 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1640 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1641 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1642 FullCR = FullCR.zeroExtend(OuterBitWidth);
1643 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1644 if (FullCR.contains(RHSCR)) {
1645 doRotateTransform();
1646 Changed = true;
1647 // Note, we are leaving SCEV in an unfortunately imprecise case here
1648 // as rotation tends to reveal information about trip counts not
1649 // previously visible.
1650 continue;
1651 }
1652 }
1653
1654 return Changed;
1655}
1656
1657bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1658 SmallVector<BasicBlock*, 16> ExitingBlocks;
1659 L->getExitingBlocks(ExitingBlocks);
1660
1661 // Remove all exits which aren't both rewriteable and execute on every
1662 // iteration.
1663 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1664 // If our exitting block exits multiple loops, we can only rewrite the
1665 // innermost one. Otherwise, we're changing how many times the innermost
1666 // loop runs before it exits.
1667 if (LI->getLoopFor(ExitingBB) != L)
1668 return true;
1669
1670 // Can't rewrite non-branch yet.
1671 CondBrInst *BI = dyn_cast<CondBrInst>(ExitingBB->getTerminator());
1672 if (!BI)
1673 return true;
1674
1675 // Likewise, the loop latch must be dominated by the exiting BB.
1676 if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1677 return true;
1678
1679 if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1680 // If already constant, nothing to do. However, if this is an
1681 // unconditional exit, we can still replace header phis with their
1682 // preheader value.
1683 if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1684 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1685 return true;
1686 }
1687
1688 return false;
1689 });
1690
1691 if (ExitingBlocks.empty())
1692 return false;
1693
1694 // Get a symbolic upper bound on the loop backedge taken count.
1695 const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1696 if (isa<SCEVCouldNotCompute>(MaxBECount))
1697 return false;
1698
1699 // Visit our exit blocks in order of dominance. We know from the fact that
1700 // all exits must dominate the latch, so there is a total dominance order
1701 // between them.
1702 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1703 // std::sort sorts in ascending order, so we want the inverse of
1704 // the normal dominance relation.
1705 if (A == B) return false;
1706 if (DT->properlyDominates(A, B))
1707 return true;
1708 else {
1709 assert(DT->properlyDominates(B, A) &&
1710 "expected total dominance order!");
1711 return false;
1712 }
1713 });
1714#ifdef ASSERT
1715 for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1716 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1717 }
1718#endif
1719
1720 bool Changed = false;
1721 bool SkipLastIter = false;
1722 const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1723 auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1724 if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1725 return;
1726 if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1727 CurrMaxExit = MaxExitCount;
1728 else
1729 CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1730 // If the loop has more than 1 iteration, all further checks will be
1731 // executed 1 iteration less.
1732 if (CurrMaxExit == MaxBECount)
1733 SkipLastIter = true;
1734 };
1735 SmallPtrSet<const SCEV *, 8> DominatingExactExitCounts;
1736 for (BasicBlock *ExitingBB : ExitingBlocks) {
1737 const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1738 const SCEV *MaxExitCount = SE->getExitCount(
1739 L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1740 if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1741 // Okay, we do not know the exit count here. Can we at least prove that it
1742 // will remain the same within iteration space?
1743 auto *BI = cast<CondBrInst>(ExitingBB->getTerminator());
1744 auto OptimizeCond = [&](bool SkipLastIter) {
1745 return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1746 MaxBECount, SkipLastIter,
1747 SE, Rewriter, DeadInsts);
1748 };
1749
1750 // TODO: We might have proved that we can skip the last iteration for
1751 // this check. In this case, we only want to check the condition on the
1752 // pre-last iteration (MaxBECount - 1). However, there is a nasty
1753 // corner case:
1754 //
1755 // for (i = len; i != 0; i--) { ... check (i ult X) ... }
1756 //
1757 // If we could not prove that len != 0, then we also could not prove that
1758 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1759 // OptimizeCond will likely not prove anything for it, even if it could
1760 // prove the same fact for len.
1761 //
1762 // As a temporary solution, we query both last and pre-last iterations in
1763 // hope that we will be able to prove triviality for at least one of
1764 // them. We can stop querying MaxBECount for this case once SCEV
1765 // understands that (MaxBECount - 1) will not overflow here.
1766 if (OptimizeCond(false))
1767 Changed = true;
1768 else if (SkipLastIter && OptimizeCond(true))
1769 Changed = true;
1770 UpdateSkipLastIter(MaxExitCount);
1771 continue;
1772 }
1773
1774 UpdateSkipLastIter(ExactExitCount);
1775
1776 // If we know we'd exit on the first iteration, rewrite the exit to
1777 // reflect this. This does not imply the loop must exit through this
1778 // exit; there may be an earlier one taken on the first iteration.
1779 // We know that the backedge can't be taken, so we replace all
1780 // the header PHIs with values coming from the preheader.
1781 if (ExactExitCount->isZero()) {
1782 foldExit(L, ExitingBB, true, DeadInsts);
1783 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1784 Changed = true;
1785 continue;
1786 }
1787
1788 assert(ExactExitCount->getType()->isIntegerTy() &&
1789 MaxBECount->getType()->isIntegerTy() &&
1790 "Exit counts must be integers");
1791
1792 Type *WiderType =
1793 SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1794 ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1795 MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1796 assert(MaxBECount->getType() == ExactExitCount->getType());
1797
1798 // Can we prove that some other exit must be taken strictly before this
1799 // one?
1800 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1801 ExactExitCount)) {
1802 foldExit(L, ExitingBB, false, DeadInsts);
1803 Changed = true;
1804 continue;
1805 }
1806
1807 // As we run, keep track of which exit counts we've encountered. If we
1808 // find a duplicate, we've found an exit which would have exited on the
1809 // exiting iteration, but (from the visit order) strictly follows another
1810 // which does the same and is thus dead.
1811 if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1812 foldExit(L, ExitingBB, false, DeadInsts);
1813 Changed = true;
1814 continue;
1815 }
1816
1817 // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1818 // here. If we kept track of the min of dominanting exits so far, we could
1819 // discharge exits with EC >= MDEC. This is less powerful than the existing
1820 // transform (since later exits aren't considered), but potentially more
1821 // powerful for any case where SCEV can prove a >=u b, but neither a == b
1822 // or a >u b. Such a case is not currently known.
1823 }
1824 return Changed;
1825}
1826
1827static bool crashingBBWithoutEffect(const BasicBlock &BB) {
1828 return llvm::all_of(BB, [](const Instruction &I) {
1829 // TODO: for now this is overly restrictive, to make sure nothing in this
1830 // BB can depend on the loop body.
1831 // It's not enough to check for !I.mayHaveSideEffects(), because e.g. a
1832 // load does not have a side effect, but we could have
1833 // %a = load ptr, ptr %ptr
1834 // %b = load i32, ptr %a
1835 // Now if the loop stored a non-nullptr to %a, we could cause a nullptr
1836 // dereference by skipping over loop iterations.
1837 if (const auto *CB = dyn_cast<CallBase>(&I)) {
1838 if (CB->onlyAccessesInaccessibleMemory())
1839 return true;
1840 }
1841 return isa<UnreachableInst>(I);
1842 });
1843}
1844
1845bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1846 SmallVector<BasicBlock*, 16> ExitingBlocks;
1847 L->getExitingBlocks(ExitingBlocks);
1848
1849 // Finally, see if we can rewrite our exit conditions into a loop invariant
1850 // form. If we have a read-only loop, and we can tell that we must exit down
1851 // a path which does not need any of the values computed within the loop, we
1852 // can rewrite the loop to exit on the first iteration. Note that this
1853 // doesn't either a) tell us the loop exits on the first iteration (unless
1854 // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1855 // This transformation looks a lot like a restricted form of dead loop
1856 // elimination, but restricted to read-only loops and without neccesssarily
1857 // needing to kill the loop entirely.
1858 if (!LoopPredication)
1859 return false;
1860
1861 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1862 // through *explicit* control flow. We have to eliminate the possibility of
1863 // implicit exits (see below) before we know it's truly exact.
1864 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1865 if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1866 return false;
1867
1868 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1869 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1870
1871 auto BadExit = [&](BasicBlock *ExitingBB) {
1872 // If our exiting block exits multiple loops, we can only rewrite the
1873 // innermost one. Otherwise, we're changing how many times the innermost
1874 // loop runs before it exits.
1875 if (LI->getLoopFor(ExitingBB) != L)
1876 return true;
1877
1878 // Can't rewrite non-branch yet.
1879 CondBrInst *BI = dyn_cast<CondBrInst>(ExitingBB->getTerminator());
1880 if (!BI)
1881 return true;
1882
1883 // If already constant, nothing to do.
1884 if (isa<Constant>(BI->getCondition()))
1885 return true;
1886
1887 // If the exit block has phis, we need to be able to compute the values
1888 // within the loop which contains them. This assumes trivially lcssa phis
1889 // have already been removed; TODO: generalize
1890 BasicBlock *ExitBlock =
1891 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1892 if (!ExitBlock->phis().empty())
1893 return true;
1894
1895 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1896 if (isa<SCEVCouldNotCompute>(ExitCount) ||
1897 !Rewriter.isSafeToExpand(ExitCount))
1898 return true;
1899
1900 assert(SE->isLoopInvariant(ExitCount, L) &&
1901 "Exit count must be loop invariant");
1902 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1903 return false;
1904 };
1905
1906 // Make sure all exits dominate the latch. This means there is a linear chain
1907 // of exits. We check this before sorting so we have a total order.
1908 BasicBlock *Latch = L->getLoopLatch();
1909 for (BasicBlock *ExitingBB : ExitingBlocks)
1910 if (!DT->dominates(ExitingBB, Latch))
1911 return false;
1912
1913 // If we have any exits which can't be predicated themselves, than we can't
1914 // predicate any exit which isn't guaranteed to execute before it. Consider
1915 // two exits (a) and (b) which would both exit on the same iteration. If we
1916 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1917 // we could convert a loop from exiting through (a) to one exiting through
1918 // (b). Note that this problem exists only for exits with the same exit
1919 // count, and we could be more aggressive when exit counts are known inequal.
1920 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1921 // llvm::sort sorts in ascending order, so we want the inverse of
1922 // the normal dominance relation.
1923 if (A == B)
1924 return false;
1925 if (DT->properlyDominates(A, B))
1926 return true;
1927 if (DT->properlyDominates(B, A))
1928 return false;
1929 llvm_unreachable("Should have total dominance order");
1930 });
1931
1932 // Make sure our exit blocks are really a total order (i.e. a linear chain of
1933 // exits before the backedge).
1934 for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1935 assert(DT->dominates(ExitingBlocks[i - 1], ExitingBlocks[i]) &&
1936 "Not sorted by dominance");
1937
1938 // Given our sorted total order, we know that exit[j] must be evaluated
1939 // after all exit[i] such j > i.
1940 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1941 if (BadExit(ExitingBlocks[i])) {
1942 ExitingBlocks.resize(i);
1943 break;
1944 }
1945
1946 if (ExitingBlocks.empty())
1947 return false;
1948
1949 // At this point, ExitingBlocks consists of only those blocks which are
1950 // predicatable. Given that, we know we have at least one exit we can
1951 // predicate if the loop is doesn't have side effects and doesn't have any
1952 // implicit exits (because then our exact BTC isn't actually exact).
1953 // @Reviewers - As structured, this is O(I^2) for loop nests. Any
1954 // suggestions on how to improve this? I can obviously bail out for outer
1955 // loops, but that seems less than ideal. MemorySSA can find memory writes,
1956 // is that enough for *all* side effects?
1957 bool HasThreadLocalSideEffects = false;
1958 for (BasicBlock *BB : L->blocks())
1959 for (auto &I : *BB) {
1960 // TODO:isGuaranteedToTransfer
1961 if (I.mayHaveSideEffects()) {
1963 return false;
1964 HasThreadLocalSideEffects = true;
1965 if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
1966 // Simple stores cannot be observed by other threads.
1967 // If HasThreadLocalSideEffects is set, we check
1968 // crashingBBWithoutEffect to make sure that the crashing BB cannot
1969 // observe them either.
1970 if (!SI->isSimple())
1971 return false;
1972 } else {
1973 return false;
1974 }
1975 }
1976
1977 // Skip if the loop has tokens referenced outside the loop to avoid
1978 // changing convergence behavior.
1979 if (I.getType()->isTokenTy()) {
1980 for (User *U : I.users()) {
1981 Instruction *UserInst = dyn_cast<Instruction>(U);
1982 if (UserInst && !L->contains(UserInst)) {
1983 return false;
1984 }
1985 }
1986 }
1987 }
1988
1989 bool Changed = false;
1990 // Finally, do the actual predication for all predicatable blocks. A couple
1991 // of notes here:
1992 // 1) We don't bother to constant fold dominated exits with identical exit
1993 // counts; that's simply a form of CSE/equality propagation and we leave
1994 // it for dedicated passes.
1995 // 2) We insert the comparison at the branch. Hoisting introduces additional
1996 // legality constraints and we leave that to dedicated logic. We want to
1997 // predicate even if we can't insert a loop invariant expression as
1998 // peeling or unrolling will likely reduce the cost of the otherwise loop
1999 // varying check.
2000 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2001 IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2002 Value *ExactBTCV = nullptr; // Lazily generated if needed.
2003 for (BasicBlock *ExitingBB : ExitingBlocks) {
2004 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2005
2006 auto *BI = cast<CondBrInst>(ExitingBB->getTerminator());
2007 if (HasThreadLocalSideEffects) {
2008 const BasicBlock *Unreachable = nullptr;
2009 for (const BasicBlock *Succ : BI->successors()) {
2010 if (isa<UnreachableInst>(Succ->getTerminator()))
2011 Unreachable = Succ;
2012 }
2013 // Exit BB which have one branch back into the loop and another one to
2014 // a trap can still be optimized, because local side effects cannot
2015 // be observed in the exit case (the trap). We could be smarter about
2016 // this, but for now lets pattern match common cases that directly trap.
2017 if (Unreachable == nullptr || !crashingBBWithoutEffect(*Unreachable))
2018 return Changed;
2019 }
2020 Value *NewCond;
2021 if (ExitCount == ExactBTC) {
2022 NewCond = L->contains(BI->getSuccessor(0)) ?
2023 B.getFalse() : B.getTrue();
2024 } else {
2025 Value *ECV = Rewriter.expandCodeFor(ExitCount);
2026 if (!ExactBTCV)
2027 ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2028 Value *RHS = ExactBTCV;
2029 if (ECV->getType() != RHS->getType()) {
2030 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2031 ECV = B.CreateZExt(ECV, WiderTy);
2032 RHS = B.CreateZExt(RHS, WiderTy);
2033 }
2034 auto Pred = L->contains(BI->getSuccessor(0)) ?
2035 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2036 NewCond = B.CreateICmp(Pred, ECV, RHS);
2037 }
2038 Value *OldCond = BI->getCondition();
2039 BI->setCondition(NewCond);
2040 if (OldCond->use_empty())
2041 DeadInsts.emplace_back(OldCond);
2042 Changed = true;
2043 RunUnswitching = true;
2044 }
2045
2046 return Changed;
2047}
2048
2049//===----------------------------------------------------------------------===//
2050// IndVarSimplify driver. Manage several subpasses of IV simplification.
2051//===----------------------------------------------------------------------===//
2052
2053bool IndVarSimplify::run(Loop *L) {
2054 // We need (and expect!) the incoming loop to be in LCSSA.
2055 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2056 "LCSSA required to run indvars!");
2057
2058 // If LoopSimplify form is not available, stay out of trouble. Some notes:
2059 // - LSR currently only supports LoopSimplify-form loops. Indvars'
2060 // canonicalization can be a pessimization without LSR to "clean up"
2061 // afterwards.
2062 // - We depend on having a preheader; in particular,
2063 // Loop::getCanonicalInductionVariable only supports loops with preheaders,
2064 // and we're in trouble if we can't find the induction variable even when
2065 // we've manually inserted one.
2066 // - LFTR relies on having a single backedge.
2067 if (!L->isLoopSimplifyForm())
2068 return false;
2069
2070 bool Changed = false;
2071 // If there are any floating-point recurrences, attempt to
2072 // transform them to use integer recurrences.
2073 Changed |= rewriteNonIntegerIVs(L);
2074
2075 // Create a rewriter object which we'll use to transform the code with.
2076 SCEVExpander Rewriter(*SE, "indvars");
2077#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2078 Rewriter.setDebugType(DEBUG_TYPE);
2079#endif
2080
2081 // Eliminate redundant IV users.
2082 //
2083 // Simplification works best when run before other consumers of SCEV. We
2084 // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2085 // other expressions involving loop IVs have been evaluated. This helps SCEV
2086 // set no-wrap flags before normalizing sign/zero extension.
2087 Rewriter.disableCanonicalMode();
2088 Changed |= simplifyAndExtend(L, Rewriter, LI);
2089
2090 // Check to see if we can compute the final value of any expressions
2091 // that are recurrent in the loop, and substitute the exit values from the
2092 // loop into any instructions outside of the loop that use the final values
2093 // of the current expressions.
2094 if (ReplaceExitValue != NeverRepl) {
2095 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
2096 ReplaceExitValue, DeadInsts)) {
2097 NumReplaced += Rewrites;
2098 Changed = true;
2099 }
2100 }
2101
2102 // Eliminate redundant IV cycles.
2103 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
2104
2105 // Try to convert exit conditions to unsigned and rotate computation
2106 // out of the loop. Note: Handles invalidation internally if needed.
2107 Changed |= canonicalizeExitCondition(L);
2108
2109 // Try to eliminate loop exits based on analyzeable exit counts
2110 if (optimizeLoopExits(L, Rewriter)) {
2111 Changed = true;
2112 // Given we've changed exit counts, notify SCEV
2113 // Some nested loops may share same folded exit basic block,
2114 // thus we need to notify top most loop.
2115 SE->forgetTopmostLoop(L);
2116 }
2117
2118 // Try to form loop invariant tests for loop exits by changing how many
2119 // iterations of the loop run when that is unobservable.
2120 if (predicateLoopExits(L, Rewriter)) {
2121 Changed = true;
2122 // Given we've changed exit counts, notify SCEV
2123 SE->forgetLoop(L);
2124 }
2125
2126 // If we have a trip count expression, rewrite the loop's exit condition
2127 // using it.
2128 if (!DisableLFTR) {
2129 BasicBlock *PreHeader = L->getLoopPreheader();
2130
2131 SmallVector<BasicBlock*, 16> ExitingBlocks;
2132 L->getExitingBlocks(ExitingBlocks);
2133 for (BasicBlock *ExitingBB : ExitingBlocks) {
2134 // Can't rewrite non-branch yet.
2135 if (!isa<CondBrInst>(ExitingBB->getTerminator()))
2136 continue;
2137
2138 // If our exitting block exits multiple loops, we can only rewrite the
2139 // innermost one. Otherwise, we're changing how many times the innermost
2140 // loop runs before it exits.
2141 if (LI->getLoopFor(ExitingBB) != L)
2142 continue;
2143
2144 if (!needsLFTR(L, ExitingBB))
2145 continue;
2146
2147 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2148 if (isa<SCEVCouldNotCompute>(ExitCount))
2149 continue;
2150
2151 // This was handled above, but as we form SCEVs, we can sometimes refine
2152 // existing ones; this allows exit counts to be folded to zero which
2153 // weren't when optimizeLoopExits saw them. Arguably, we should iterate
2154 // until stable to handle cases like this better.
2155 if (ExitCount->isZero())
2156 continue;
2157
2158 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2159 if (!IndVar)
2160 continue;
2161
2162 // Avoid high cost expansions. Note: This heuristic is questionable in
2163 // that our definition of "high cost" is not exactly principled.
2164 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2165 TTI, PreHeader->getTerminator()))
2166 continue;
2167
2168 if (!Rewriter.isSafeToExpand(ExitCount))
2169 continue;
2170
2171 Changed |= linearFunctionTestReplace(L, ExitingBB,
2172 ExitCount, IndVar,
2173 Rewriter);
2174 }
2175 }
2176 // Clear the rewriter cache, because values that are in the rewriter's cache
2177 // can be deleted in the loop below, causing the AssertingVH in the cache to
2178 // trigger.
2179 Rewriter.clear();
2180
2181 // Now that we're done iterating through lists, clean up any instructions
2182 // which are now dead.
2183 while (!DeadInsts.empty()) {
2184 Value *V = DeadInsts.pop_back_val();
2185
2186 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2187 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2188 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2189 Changed |=
2190 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2191 }
2192
2193 // The Rewriter may not be used from this point on.
2194
2195 // Loop-invariant instructions in the preheader that aren't used in the
2196 // loop may be sunk below the loop to reduce register pressure.
2197 Changed |= sinkUnusedInvariants(L);
2198
2199 // rewriteFirstIterationLoopExitValues does not rely on the computation of
2200 // trip count and therefore can further simplify exit values in addition to
2201 // rewriteLoopExitValues.
2202 Changed |= rewriteFirstIterationLoopExitValues(L);
2203
2204 // Clean up dead instructions.
2205 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2206
2207 // Check a post-condition.
2208 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2209 "Indvars did not preserve LCSSA!");
2210 if (VerifyMemorySSA && MSSAU)
2211 MSSAU->getMemorySSA()->verifyMemorySSA();
2212
2213 return Changed;
2214}
2215
2218 LPMUpdater &) {
2219 Function *F = L.getHeader()->getParent();
2220 const DataLayout &DL = F->getDataLayout();
2221
2222 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2223 WidenIndVars && AllowIVWidening);
2224 if (!IVS.run(&L))
2225 return PreservedAnalyses::all();
2226
2227 auto PA = getLoopPassPreservedAnalyses();
2228 PA.preserveSet<CFGAnalyses>();
2229 if (IVS.runUnswitching()) {
2231 PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
2232 }
2233
2234 if (AR.MSSA)
2235 PA.preserve<MemorySSAAnalysis>();
2236 return PA;
2237}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define DEBUG_TYPE
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
static bool optimizeLoopExitWithUnknownExitCount(const Loop *L, CondBrInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static Value * genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, const SCEV *ExitCount, bool UsePostInc, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
Insert an IR expression which computes the value held by the IV IndVar (which must be an loop counter...
static std::optional< FloatingPointIV > maybeFloatingPointRecurrence(Loop *L, PHINode *PN)
Analyze a PN to determine whether it represents a simple floating-point induction variable,...
static cl::opt< bool > DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization"))
static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB)
Whether the current loop exit test is based on this value.
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(UnusedIndVarInLoop, "unusedindvarinloop", "only replace exit value when it is an unused " "induction variable in the loop and has cheap replacement cost"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
Update information about the induction variable that is extended by this sign or zero extend operatio...
static bool isRepresentableAsExactInteger(const APFloat &FPVal, int64_t IntVal)
Ensure we stay within the bounds of fp values that can be represented as integers without gaps,...
static void replaceLoopPHINodesWithPreheaderValues(LoopInfo *LI, Loop *L, SmallVectorImpl< WeakTrackingVH > &DeadInsts, ScalarEvolution &SE)
static void replaceExitCond(CondBrInst *BI, Value *NewCond, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static bool needsLFTR(Loop *L, BasicBlock *ExitingBB)
linearFunctionTestReplace policy.
static Value * createInvariantCond(const Loop *L, BasicBlock *ExitingBB, const ScalarEvolution::LoopInvariantPredicate &LIP, SCEVExpander &Rewriter)
static bool isLoopCounter(PHINode *Phi, Loop *L, ScalarEvolution *SE)
Return true if the given phi is a "counter" in L.
static std::optional< Value * > createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB, const SCEV *MaxIter, bool Inverted, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter)
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value * > &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L)
Given an Value which is hoped to be part of an add recurance in the given loop, return the associated...
static Constant * createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB, bool IsTaken)
static std::optional< IntegerIV > tryConvertToIntegerIV(const FloatingPointIV &FPIV)
Ensure that the floating-point IV can be converted to a semantics-preserving signed 32-bit integer IV...
static cl::opt< bool > LoopPredicationTraps("indvars-predicate-loop-traps", cl::Hidden, cl::init(true), cl::desc("Predicate conditions that trap in loops with only local writes"))
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
static void canonicalizeToIntegerIV(Loop *L, PHINode *PN, const FloatingPointIV &FPIV, const IntegerIV &IIV, const TargetLibraryInfo *TLI, std::unique_ptr< MemorySSAUpdater > &MSSAU)
Rewrite the floating-point IV as an integer IV.
static PHINode * FindLoopCounter(Loop *L, BasicBlock *ExitingBB, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
Search the loop header for a loop counter (anadd rec w/step of one) suitable for use by LFTR.
static cl::opt< bool > AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), cl::desc("Allow widening of indvars to eliminate s/zext"))
static bool crashingBBWithoutEffect(const BasicBlock &BB)
static CmpInst::Predicate getIntegerPredicate(CmpInst::Predicate FPPred)
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
Convert APF to an integer, if possible.
static cl::opt< bool > LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), cl::desc("Predicate conditions in read only loops"))
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:214
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:255
const fltSemantics & getSemantics() const
Definition APFloat.h:1524
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1387
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:518
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition InstrTypes.h:610
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
static LLVM_ABI StringRef getPredicateName(Predicate P)
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Conditional Branch instruction.
void setCondition(Value *V)
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
iterator_range< succ_iterator > successors()
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition DataLayout.h:240
static DebugLoc getDropped()
Definition DebugLoc.h:163
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This instruction compares its operands according to the predicate given to the constructor.
This instruction compares its operands according to the predicate given to the constructor.
CmpPredicate getCmpPredicate() const
CmpPredicate getInverseCmpPredicate() const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2811
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition LoopInfo.h:441
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:922
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
This node represents a polynomial recurrence on the trip count of the specified loop.
LLVM_ABI const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
LLVM_ABI const SCEVAddRecExpr * getPostIncExpr(ScalarEvolution &SE) const
Return an expression representing the value of this expression one iteration of the loop ahead.
SCEVUse getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class uses information about analyze scalars to rewrite expressions in canonical form.
This class represents an analyzed expression in the program.
LLVM_ABI bool isOne() const
Return true if the expression is a constant one.
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents a cast from signed integer to floating point.
The main scalar evolution driver.
LLVM_ABI std::optional< LoopInvariantPredicate > getLoopInvariantExitCondDuringFirstIterations(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, const Instruction *CtxI, const SCEV *MaxIter)
If the result of the predicate LHS Pred RHS is loop invariant with respect to L at given Context duri...
LLVM_ABI Type * getWiderType(Type *Ty1, Type *Ty2) const
LLVM_ABI bool isLoopEntryGuardedByCond(const Loop *L, CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS.
LLVM_ABI const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI ExitLimit computeExitLimitFromCond(const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates=false)
Compute the number of times the backedge of the specified loop will execute if its exit condition wer...
LLVM_ABI uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI std::optional< bool > evaluatePredicateAt(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Check whether the condition described by Pred, LHS, and RHS is true or false in the given Context.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI void forgetTopmostLoop(const Loop *L)
LLVM_ABI void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
LLVM_ABI const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getMinusOne(Type *Ty)
Return a SCEV for the constant -1 of a specific type.
LLVM_ABI const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI const SCEV * getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
Promote the operands to the wider of the types using zero-extension, and then perform a umin operatio...
LLVM_ABI const SCEV * getCouldNotCompute()
LLVM_ABI const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
@ SymbolicMaximum
An expression which provides an upper bound on the exact trip count.
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI bool isKnownPredicateAt(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:284
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:440
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
iterator_range< user_iterator > users()
Definition Value.h:427
bool use_empty() const
Definition Value.h:347
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
Value handle that is nullable, but tries to track the Value.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
cst_pred_ty< is_one > m_scev_One()
Match an integer 1.
specificloop_ty m_SpecificLoop(const Loop *L)
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
bool match(const SCEV *S, const Pattern &P)
class_match< const SCEV > m_SCEV()
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, Instruction *OnPathTo, DominatorTree *DT)
Return true if undefined behavior would provable be executed on the path to OnPathTo if Root produced...
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:535
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
PHINode * createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges)
Widen Induction Variables - Extend the width of an IV to cover its widest uses.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI cl::opt< unsigned > SCEVCheapExpansionBudget
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
std::pair< bool, bool > simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition MemorySSA.cpp:84
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
DWARFExpression::Operation Op
constexpr U AbsoluteValue(T X)
Return the absolute value of a signed integer, converted to the corresponding unsigned integer type.
Definition MathExtras.h:592
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1917
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
LLVM_ABI bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond)
Return true if the induction variable IV in a Loop whose latch is LatchBlock would become dead if the...
LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:368
LLVM_ABI bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
Definition Local.cpp:643
@ UnusedIndVarInLoop
Definition LoopUtils.h:569
@ OnlyCheapRepl
Definition LoopUtils.h:567
@ NeverRepl
Definition LoopUtils.h:566
@ NoHardUse
Definition LoopUtils.h:568
@ AlwaysRepl
Definition LoopUtils.h:570
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
Represents a floating-point induction variable pattern that may be convertible to integer form.
FloatingPointIV(APFloat Init, APFloat Incr, APFloat Exit, FCmpInst *Compare, BinaryOperator *Add)
BinaryOperator * Add
Represents the integer values for a converted IV.
int64_t InitValue
int64_t ExitValue
int64_t IncrValue
CmpInst::Predicate NewPred
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
A marker analysis to determine if SimpleLoopUnswitch should run again on a given loop.
Collect information about induction variables that are used by sign/zero extend operations.