LLVM 17.0.0git
IndVarSimplify.cpp
Go to the documentation of this file.
1//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This transformation analyzes and transforms the induction variables (and
10// computations derived from them) into simpler forms suitable for subsequent
11// analysis and transformation.
12//
13// If the trip count of a loop is computable, this pass also makes the following
14// changes:
15// 1. The exit condition for the loop is canonicalized to compare the
16// induction value against the exit value. This turns loops like:
17// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18// 2. Any use outside of the loop of an expression derived from the indvar
19// is changed to compute the derived value outside of the loop, eliminating
20// the dependence on the exit value of the induction variable. If the only
21// purpose of the loop is to compute the exit value of some derived
22// expression, this transformation will make the loop dead.
23//
24//===----------------------------------------------------------------------===//
25
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/Statistic.h"
44#include "llvm/IR/BasicBlock.h"
45#include "llvm/IR/Constant.h"
47#include "llvm/IR/Constants.h"
48#include "llvm/IR/DataLayout.h"
50#include "llvm/IR/Dominators.h"
51#include "llvm/IR/Function.h"
52#include "llvm/IR/IRBuilder.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
57#include "llvm/IR/Intrinsics.h"
58#include "llvm/IR/Module.h"
59#include "llvm/IR/Operator.h"
60#include "llvm/IR/PassManager.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
68#include "llvm/Pass.h"
72#include "llvm/Support/Debug.h"
81#include <cassert>
82#include <cstdint>
83#include <utility>
84
85using namespace llvm;
86using namespace PatternMatch;
87
88#define DEBUG_TYPE "indvars"
89
90STATISTIC(NumWidened , "Number of indvars widened");
91STATISTIC(NumReplaced , "Number of exit values replaced");
92STATISTIC(NumLFTR , "Number of loop exit tests replaced");
93STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
94STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
95
96// Trip count verification can be enabled by default under NDEBUG if we
97// implement a strong expression equivalence checker in SCEV. Until then, we
98// use the verify-indvars flag, which may assert in some cases.
100 "verify-indvars", cl::Hidden,
101 cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
102 "effect in release builds. (Note: this adds additional SCEV "
103 "queries potentially changing the analysis result)"));
104
106 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
107 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
109 clEnumValN(NeverRepl, "never", "never replace exit value"),
110 clEnumValN(OnlyCheapRepl, "cheap",
111 "only replace exit value when the cost is cheap"),
113 UnusedIndVarInLoop, "unusedindvarinloop",
114 "only replace exit value when it is an unused "
115 "induction variable in the loop and has cheap replacement cost"),
116 clEnumValN(NoHardUse, "noharduse",
117 "only replace exit values when loop def likely dead"),
118 clEnumValN(AlwaysRepl, "always",
119 "always replace exit value whenever possible")));
120
122 "indvars-post-increment-ranges", cl::Hidden,
123 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
124 cl::init(true));
125
126static cl::opt<bool>
127DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
128 cl::desc("Disable Linear Function Test Replace optimization"));
129
130static cl::opt<bool>
131LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
132 cl::desc("Predicate conditions in read only loops"));
133
134static cl::opt<bool>
135AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
136 cl::desc("Allow widening of indvars to eliminate s/zext"));
137
138namespace {
139
140class IndVarSimplify {
141 LoopInfo *LI;
142 ScalarEvolution *SE;
143 DominatorTree *DT;
144 const DataLayout &DL;
147 std::unique_ptr<MemorySSAUpdater> MSSAU;
148
150 bool WidenIndVars;
151
152 bool handleFloatingPointIV(Loop *L, PHINode *PH);
153 bool rewriteNonIntegerIVs(Loop *L);
154
155 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
156 /// Try to improve our exit conditions by converting condition from signed
157 /// to unsigned or rotating computation out of the loop.
158 /// (See inline comment about why this is duplicated from simplifyAndExtend)
159 bool canonicalizeExitCondition(Loop *L);
160 /// Try to eliminate loop exits based on analyzeable exit counts
161 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
162 /// Try to form loop invariant tests for loop exits by changing how many
163 /// iterations of the loop run when that is unobservable.
164 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
165
166 bool rewriteFirstIterationLoopExitValues(Loop *L);
167
168 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
169 const SCEV *ExitCount,
170 PHINode *IndVar, SCEVExpander &Rewriter);
171
172 bool sinkUnusedInvariants(Loop *L);
173
174public:
175 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
176 const DataLayout &DL, TargetLibraryInfo *TLI,
177 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
178 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
179 WidenIndVars(WidenIndVars) {
180 if (MSSA)
181 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
182 }
183
184 bool run(Loop *L);
185};
186
187} // end anonymous namespace
188
189//===----------------------------------------------------------------------===//
190// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
191//===----------------------------------------------------------------------===//
192
193/// Convert APF to an integer, if possible.
194static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
195 bool isExact = false;
196 // See if we can convert this to an int64_t
197 uint64_t UIntVal;
198 if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
199 APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
200 !isExact)
201 return false;
202 IntVal = UIntVal;
203 return true;
204}
205
206/// If the loop has floating induction variable then insert corresponding
207/// integer induction variable if possible.
208/// For example,
209/// for(double i = 0; i < 10000; ++i)
210/// bar(i)
211/// is converted into
212/// for(int i = 0; i < 10000; ++i)
213/// bar((double)i);
214bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
215 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
216 unsigned BackEdge = IncomingEdge^1;
217
218 // Check incoming value.
219 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
220
221 int64_t InitValue;
222 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
223 return false;
224
225 // Check IV increment. Reject this PN if increment operation is not
226 // an add or increment value can not be represented by an integer.
227 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
228 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
229
230 // If this is not an add of the PHI with a constantfp, or if the constant fp
231 // is not an integer, bail out.
232 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
233 int64_t IncValue;
234 if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
235 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
236 return false;
237
238 // Check Incr uses. One user is PN and the other user is an exit condition
239 // used by the conditional terminator.
240 Value::user_iterator IncrUse = Incr->user_begin();
241 Instruction *U1 = cast<Instruction>(*IncrUse++);
242 if (IncrUse == Incr->user_end()) return false;
243 Instruction *U2 = cast<Instruction>(*IncrUse++);
244 if (IncrUse != Incr->user_end()) return false;
245
246 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
247 // only used by a branch, we can't transform it.
248 FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
249 if (!Compare)
250 Compare = dyn_cast<FCmpInst>(U2);
251 if (!Compare || !Compare->hasOneUse() ||
252 !isa<BranchInst>(Compare->user_back()))
253 return false;
254
255 BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
256
257 // We need to verify that the branch actually controls the iteration count
258 // of the loop. If not, the new IV can overflow and no one will notice.
259 // The branch block must be in the loop and one of the successors must be out
260 // of the loop.
261 assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
262 if (!L->contains(TheBr->getParent()) ||
263 (L->contains(TheBr->getSuccessor(0)) &&
264 L->contains(TheBr->getSuccessor(1))))
265 return false;
266
267 // If it isn't a comparison with an integer-as-fp (the exit value), we can't
268 // transform it.
269 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
270 int64_t ExitValue;
271 if (ExitValueVal == nullptr ||
272 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
273 return false;
274
275 // Find new predicate for integer comparison.
277 switch (Compare->getPredicate()) {
278 default: return false; // Unknown comparison.
280 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
282 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
284 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
286 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
288 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
290 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
291 }
292
293 // We convert the floating point induction variable to a signed i32 value if
294 // we can. This is only safe if the comparison will not overflow in a way
295 // that won't be trapped by the integer equivalent operations. Check for this
296 // now.
297 // TODO: We could use i64 if it is native and the range requires it.
298
299 // The start/stride/exit values must all fit in signed i32.
300 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
301 return false;
302
303 // If not actually striding (add x, 0.0), avoid touching the code.
304 if (IncValue == 0)
305 return false;
306
307 // Positive and negative strides have different safety conditions.
308 if (IncValue > 0) {
309 // If we have a positive stride, we require the init to be less than the
310 // exit value.
311 if (InitValue >= ExitValue)
312 return false;
313
314 uint32_t Range = uint32_t(ExitValue-InitValue);
315 // Check for infinite loop, either:
316 // while (i <= Exit) or until (i > Exit)
317 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
318 if (++Range == 0) return false; // Range overflows.
319 }
320
321 unsigned Leftover = Range % uint32_t(IncValue);
322
323 // If this is an equality comparison, we require that the strided value
324 // exactly land on the exit value, otherwise the IV condition will wrap
325 // around and do things the fp IV wouldn't.
326 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
327 Leftover != 0)
328 return false;
329
330 // If the stride would wrap around the i32 before exiting, we can't
331 // transform the IV.
332 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
333 return false;
334 } else {
335 // If we have a negative stride, we require the init to be greater than the
336 // exit value.
337 if (InitValue <= ExitValue)
338 return false;
339
340 uint32_t Range = uint32_t(InitValue-ExitValue);
341 // Check for infinite loop, either:
342 // while (i >= Exit) or until (i < Exit)
343 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
344 if (++Range == 0) return false; // Range overflows.
345 }
346
347 unsigned Leftover = Range % uint32_t(-IncValue);
348
349 // If this is an equality comparison, we require that the strided value
350 // exactly land on the exit value, otherwise the IV condition will wrap
351 // around and do things the fp IV wouldn't.
352 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
353 Leftover != 0)
354 return false;
355
356 // If the stride would wrap around the i32 before exiting, we can't
357 // transform the IV.
358 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
359 return false;
360 }
361
363
364 // Insert new integer induction variable.
365 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
366 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
367 PN->getIncomingBlock(IncomingEdge));
368
369 Value *NewAdd =
370 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
371 Incr->getName()+".int", Incr);
372 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
373
374 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
375 ConstantInt::get(Int32Ty, ExitValue),
376 Compare->getName());
377
378 // In the following deletions, PN may become dead and may be deleted.
379 // Use a WeakTrackingVH to observe whether this happens.
380 WeakTrackingVH WeakPH = PN;
381
382 // Delete the old floating point exit comparison. The branch starts using the
383 // new comparison.
384 NewCompare->takeName(Compare);
385 Compare->replaceAllUsesWith(NewCompare);
386 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
387
388 // Delete the old floating point increment.
389 Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
390 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
391
392 // If the FP induction variable still has uses, this is because something else
393 // in the loop uses its value. In order to canonicalize the induction
394 // variable, we chose to eliminate the IV and rewrite it in terms of an
395 // int->fp cast.
396 //
397 // We give preference to sitofp over uitofp because it is faster on most
398 // platforms.
399 if (WeakPH) {
400 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
401 &*PN->getParent()->getFirstInsertionPt());
402 PN->replaceAllUsesWith(Conv);
403 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
404 }
405 return true;
406}
407
408bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
409 // First step. Check to see if there are any floating-point recurrences.
410 // If there are, change them into integer recurrences, permitting analysis by
411 // the SCEV routines.
412 BasicBlock *Header = L->getHeader();
413
415 for (PHINode &PN : Header->phis())
416 PHIs.push_back(&PN);
417
418 bool Changed = false;
419 for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
420 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
421 Changed |= handleFloatingPointIV(L, PN);
422
423 // If the loop previously had floating-point IV, ScalarEvolution
424 // may not have been able to compute a trip count. Now that we've done some
425 // re-writing, the trip count may be computable.
426 if (Changed)
427 SE->forgetLoop(L);
428 return Changed;
429}
430
431//===---------------------------------------------------------------------===//
432// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
433// they will exit at the first iteration.
434//===---------------------------------------------------------------------===//
435
436/// Check to see if this loop has loop invariant conditions which lead to loop
437/// exits. If so, we know that if the exit path is taken, it is at the first
438/// loop iteration. This lets us predict exit values of PHI nodes that live in
439/// loop header.
440bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
441 // Verify the input to the pass is already in LCSSA form.
442 assert(L->isLCSSAForm(*DT));
443
445 L->getUniqueExitBlocks(ExitBlocks);
446
447 bool MadeAnyChanges = false;
448 for (auto *ExitBB : ExitBlocks) {
449 // If there are no more PHI nodes in this exit block, then no more
450 // values defined inside the loop are used on this path.
451 for (PHINode &PN : ExitBB->phis()) {
452 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
453 IncomingValIdx != E; ++IncomingValIdx) {
454 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
455
456 // Can we prove that the exit must run on the first iteration if it
457 // runs at all? (i.e. early exits are fine for our purposes, but
458 // traces which lead to this exit being taken on the 2nd iteration
459 // aren't.) Note that this is about whether the exit branch is
460 // executed, not about whether it is taken.
461 if (!L->getLoopLatch() ||
462 !DT->dominates(IncomingBB, L->getLoopLatch()))
463 continue;
464
465 // Get condition that leads to the exit path.
466 auto *TermInst = IncomingBB->getTerminator();
467
468 Value *Cond = nullptr;
469 if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
470 // Must be a conditional branch, otherwise the block
471 // should not be in the loop.
472 Cond = BI->getCondition();
473 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
474 Cond = SI->getCondition();
475 else
476 continue;
477
478 if (!L->isLoopInvariant(Cond))
479 continue;
480
481 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
482
483 // Only deal with PHIs in the loop header.
484 if (!ExitVal || ExitVal->getParent() != L->getHeader())
485 continue;
486
487 // If ExitVal is a PHI on the loop header, then we know its
488 // value along this exit because the exit can only be taken
489 // on the first iteration.
490 auto *LoopPreheader = L->getLoopPreheader();
491 assert(LoopPreheader && "Invalid loop");
492 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
493 if (PreheaderIdx != -1) {
494 assert(ExitVal->getParent() == L->getHeader() &&
495 "ExitVal must be in loop header");
496 MadeAnyChanges = true;
497 PN.setIncomingValue(IncomingValIdx,
498 ExitVal->getIncomingValue(PreheaderIdx));
499 SE->forgetValue(&PN);
500 }
501 }
502 }
503 }
504 return MadeAnyChanges;
505}
506
507//===----------------------------------------------------------------------===//
508// IV Widening - Extend the width of an IV to cover its widest uses.
509//===----------------------------------------------------------------------===//
510
511/// Update information about the induction variable that is extended by this
512/// sign or zero extend operation. This is used to determine the final width of
513/// the IV before actually widening it.
514static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
515 ScalarEvolution *SE,
516 const TargetTransformInfo *TTI) {
517 bool IsSigned = Cast->getOpcode() == Instruction::SExt;
518 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
519 return;
520
521 Type *Ty = Cast->getType();
522 uint64_t Width = SE->getTypeSizeInBits(Ty);
523 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
524 return;
525
526 // Check that `Cast` actually extends the induction variable (we rely on this
527 // later). This takes care of cases where `Cast` is extending a truncation of
528 // the narrow induction variable, and thus can end up being narrower than the
529 // "narrow" induction variable.
530 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
531 if (NarrowIVWidth >= Width)
532 return;
533
534 // Cast is either an sext or zext up to this point.
535 // We should not widen an indvar if arithmetics on the wider indvar are more
536 // expensive than those on the narrower indvar. We check only the cost of ADD
537 // because at least an ADD is required to increment the induction variable. We
538 // could compute more comprehensively the cost of all instructions on the
539 // induction variable when necessary.
540 if (TTI &&
541 TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
542 TTI->getArithmeticInstrCost(Instruction::Add,
543 Cast->getOperand(0)->getType())) {
544 return;
545 }
546
547 if (!WI.WidestNativeType ||
548 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
550 WI.IsSigned = IsSigned;
551 return;
552 }
553
554 // We extend the IV to satisfy the sign of its user(s), or 'signed'
555 // if there are multiple users with both sign- and zero extensions,
556 // in order not to introduce nondeterministic behaviour based on the
557 // unspecified order of a PHI nodes' users-iterator.
558 WI.IsSigned |= IsSigned;
559}
560
561//===----------------------------------------------------------------------===//
562// Live IV Reduction - Minimize IVs live across the loop.
563//===----------------------------------------------------------------------===//
564
565//===----------------------------------------------------------------------===//
566// Simplification of IV users based on SCEV evaluation.
567//===----------------------------------------------------------------------===//
568
569namespace {
570
571class IndVarSimplifyVisitor : public IVVisitor {
572 ScalarEvolution *SE;
574 PHINode *IVPhi;
575
576public:
577 WideIVInfo WI;
578
579 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
581 const DominatorTree *DTree)
582 : SE(SCEV), TTI(TTI), IVPhi(IV) {
583 DT = DTree;
584 WI.NarrowIV = IVPhi;
585 }
586
587 // Implement the interface used by simplifyUsersOfIV.
588 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
589};
590
591} // end anonymous namespace
592
593/// Iteratively perform simplification on a worklist of IV users. Each
594/// successive simplification may push more users which may themselves be
595/// candidates for simplification.
596///
597/// Sign/Zero extend elimination is interleaved with IV simplification.
598bool IndVarSimplify::simplifyAndExtend(Loop *L,
599 SCEVExpander &Rewriter,
600 LoopInfo *LI) {
602
603 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
604 Intrinsic::getName(Intrinsic::experimental_guard));
605 bool HasGuards = GuardDecl && !GuardDecl->use_empty();
606
608 for (PHINode &PN : L->getHeader()->phis())
609 LoopPhis.push_back(&PN);
610
611 // Each round of simplification iterates through the SimplifyIVUsers worklist
612 // for all current phis, then determines whether any IVs can be
613 // widened. Widening adds new phis to LoopPhis, inducing another round of
614 // simplification on the wide IVs.
615 bool Changed = false;
616 while (!LoopPhis.empty()) {
617 // Evaluate as many IV expressions as possible before widening any IVs. This
618 // forces SCEV to set no-wrap flags before evaluating sign/zero
619 // extension. The first time SCEV attempts to normalize sign/zero extension,
620 // the result becomes final. So for the most predictable results, we delay
621 // evaluation of sign/zero extend evaluation until needed, and avoid running
622 // other SCEV based analysis prior to simplifyAndExtend.
623 do {
624 PHINode *CurrIV = LoopPhis.pop_back_val();
625
626 // Information about sign/zero extensions of CurrIV.
627 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
628
629 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
630 &Visitor);
631
632 if (Visitor.WI.WidestNativeType) {
633 WideIVs.push_back(Visitor.WI);
634 }
635 } while(!LoopPhis.empty());
636
637 // Continue if we disallowed widening.
638 if (!WidenIndVars)
639 continue;
640
641 for (; !WideIVs.empty(); WideIVs.pop_back()) {
642 unsigned ElimExt;
643 unsigned Widened;
644 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
645 DT, DeadInsts, ElimExt, Widened,
646 HasGuards, UsePostIncrementRanges)) {
647 NumElimExt += ElimExt;
648 NumWidened += Widened;
649 Changed = true;
650 LoopPhis.push_back(WidePhi);
651 }
652 }
653 }
654 return Changed;
655}
656
657//===----------------------------------------------------------------------===//
658// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
659//===----------------------------------------------------------------------===//
660
661/// Given an Value which is hoped to be part of an add recurance in the given
662/// loop, return the associated Phi node if so. Otherwise, return null. Note
663/// that this is less general than SCEVs AddRec checking.
665 Instruction *IncI = dyn_cast<Instruction>(IncV);
666 if (!IncI)
667 return nullptr;
668
669 switch (IncI->getOpcode()) {
670 case Instruction::Add:
671 case Instruction::Sub:
672 break;
673 case Instruction::GetElementPtr:
674 // An IV counter must preserve its type.
675 if (IncI->getNumOperands() == 2)
676 break;
677 [[fallthrough]];
678 default:
679 return nullptr;
680 }
681
682 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
683 if (Phi && Phi->getParent() == L->getHeader()) {
684 if (L->isLoopInvariant(IncI->getOperand(1)))
685 return Phi;
686 return nullptr;
687 }
688 if (IncI->getOpcode() == Instruction::GetElementPtr)
689 return nullptr;
690
691 // Allow add/sub to be commuted.
692 Phi = dyn_cast<PHINode>(IncI->getOperand(1));
693 if (Phi && Phi->getParent() == L->getHeader()) {
694 if (L->isLoopInvariant(IncI->getOperand(0)))
695 return Phi;
696 }
697 return nullptr;
698}
699
700/// Whether the current loop exit test is based on this value. Currently this
701/// is limited to a direct use in the loop condition.
702static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
703 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
704 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
705 // TODO: Allow non-icmp loop test.
706 if (!ICmp)
707 return false;
708
709 // TODO: Allow indirect use.
710 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
711}
712
713/// linearFunctionTestReplace policy. Return true unless we can show that the
714/// current exit test is already sufficiently canonical.
715static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
716 assert(L->getLoopLatch() && "Must be in simplified form");
717
718 // Avoid converting a constant or loop invariant test back to a runtime
719 // test. This is critical for when SCEV's cached ExitCount is less precise
720 // than the current IR (such as after we've proven a particular exit is
721 // actually dead and thus the BE count never reaches our ExitCount.)
722 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
723 if (L->isLoopInvariant(BI->getCondition()))
724 return false;
725
726 // Do LFTR to simplify the exit condition to an ICMP.
727 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
728 if (!Cond)
729 return true;
730
731 // Do LFTR to simplify the exit ICMP to EQ/NE
732 ICmpInst::Predicate Pred = Cond->getPredicate();
733 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
734 return true;
735
736 // Look for a loop invariant RHS
737 Value *LHS = Cond->getOperand(0);
738 Value *RHS = Cond->getOperand(1);
739 if (!L->isLoopInvariant(RHS)) {
740 if (!L->isLoopInvariant(LHS))
741 return true;
742 std::swap(LHS, RHS);
743 }
744 // Look for a simple IV counter LHS
745 PHINode *Phi = dyn_cast<PHINode>(LHS);
746 if (!Phi)
747 Phi = getLoopPhiForCounter(LHS, L);
748
749 if (!Phi)
750 return true;
751
752 // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
753 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
754 if (Idx < 0)
755 return true;
756
757 // Do LFTR if the exit condition's IV is *not* a simple counter.
758 Value *IncV = Phi->getIncomingValue(Idx);
759 return Phi != getLoopPhiForCounter(IncV, L);
760}
761
762/// Return true if undefined behavior would provable be executed on the path to
763/// OnPathTo if Root produced a posion result. Note that this doesn't say
764/// anything about whether OnPathTo is actually executed or whether Root is
765/// actually poison. This can be used to assess whether a new use of Root can
766/// be added at a location which is control equivalent with OnPathTo (such as
767/// immediately before it) without introducing UB which didn't previously
768/// exist. Note that a false result conveys no information.
770 Instruction *OnPathTo,
771 DominatorTree *DT) {
772 // Basic approach is to assume Root is poison, propagate poison forward
773 // through all users we can easily track, and then check whether any of those
774 // users are provable UB and must execute before out exiting block might
775 // exit.
776
777 // The set of all recursive users we've visited (which are assumed to all be
778 // poison because of said visit)
779 SmallSet<const Value *, 16> KnownPoison;
781 Worklist.push_back(Root);
782 while (!Worklist.empty()) {
783 const Instruction *I = Worklist.pop_back_val();
784
785 // If we know this must trigger UB on a path leading our target.
786 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
787 return true;
788
789 // If we can't analyze propagation through this instruction, just skip it
790 // and transitive users. Safe as false is a conservative result.
791 if (I != Root && !any_of(I->operands(), [&KnownPoison](const Use &U) {
792 return KnownPoison.contains(U) && propagatesPoison(U);
793 }))
794 continue;
795
796 if (KnownPoison.insert(I).second)
797 for (const User *User : I->users())
798 Worklist.push_back(cast<Instruction>(User));
799 }
800
801 // Might be non-UB, or might have a path we couldn't prove must execute on
802 // way to exiting bb.
803 return false;
804}
805
806/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
807/// down to checking that all operands are constant and listing instructions
808/// that may hide undef.
810 unsigned Depth) {
811 if (isa<Constant>(V))
812 return !isa<UndefValue>(V);
813
814 if (Depth >= 6)
815 return false;
816
817 // Conservatively handle non-constant non-instructions. For example, Arguments
818 // may be undef.
819 Instruction *I = dyn_cast<Instruction>(V);
820 if (!I)
821 return false;
822
823 // Load and return values may be undef.
824 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
825 return false;
826
827 // Optimistically handle other instructions.
828 for (Value *Op : I->operands()) {
829 if (!Visited.insert(Op).second)
830 continue;
831 if (!hasConcreteDefImpl(Op, Visited, Depth+1))
832 return false;
833 }
834 return true;
835}
836
837/// Return true if the given value is concrete. We must prove that undef can
838/// never reach it.
839///
840/// TODO: If we decide that this is a good approach to checking for undef, we
841/// may factor it into a common location.
842static bool hasConcreteDef(Value *V) {
844 Visited.insert(V);
845 return hasConcreteDefImpl(V, Visited, 0);
846}
847
848/// Return true if this IV has any uses other than the (soon to be rewritten)
849/// loop exit test.
850static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
851 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
852 Value *IncV = Phi->getIncomingValue(LatchIdx);
853
854 for (User *U : Phi->users())
855 if (U != Cond && U != IncV) return false;
856
857 for (User *U : IncV->users())
858 if (U != Cond && U != Phi) return false;
859 return true;
860}
861
862/// Return true if the given phi is a "counter" in L. A counter is an
863/// add recurance (of integer or pointer type) with an arbitrary start, and a
864/// step of 1. Note that L must have exactly one latch.
865static bool isLoopCounter(PHINode* Phi, Loop *L,
866 ScalarEvolution *SE) {
867 assert(Phi->getParent() == L->getHeader());
868 assert(L->getLoopLatch());
869
870 if (!SE->isSCEVable(Phi->getType()))
871 return false;
872
873 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
874 if (!AR || AR->getLoop() != L || !AR->isAffine())
875 return false;
876
877 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
878 if (!Step || !Step->isOne())
879 return false;
880
881 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
882 Value *IncV = Phi->getIncomingValue(LatchIdx);
883 return (getLoopPhiForCounter(IncV, L) == Phi &&
884 isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
885}
886
887/// Search the loop header for a loop counter (anadd rec w/step of one)
888/// suitable for use by LFTR. If multiple counters are available, select the
889/// "best" one based profitable heuristics.
890///
891/// BECount may be an i8* pointer type. The pointer difference is already
892/// valid count without scaling the address stride, so it remains a pointer
893/// expression as far as SCEV is concerned.
894static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
895 const SCEV *BECount,
897 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
898
899 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
900
901 // Loop over all of the PHI nodes, looking for a simple counter.
902 PHINode *BestPhi = nullptr;
903 const SCEV *BestInit = nullptr;
904 BasicBlock *LatchBlock = L->getLoopLatch();
905 assert(LatchBlock && "Must be in simplified form");
906 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
907
908 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
909 PHINode *Phi = cast<PHINode>(I);
910 if (!isLoopCounter(Phi, L, SE))
911 continue;
912
913 // Avoid comparing an integer IV against a pointer Limit.
914 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
915 continue;
916
917 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
918
919 // AR may be a pointer type, while BECount is an integer type.
920 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
921 // AR may not be a narrower type, or we may never exit.
922 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
923 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
924 continue;
925
926 // Avoid reusing a potentially undef value to compute other values that may
927 // have originally had a concrete definition.
928 if (!hasConcreteDef(Phi)) {
929 // We explicitly allow unknown phis as long as they are already used by
930 // the loop exit test. This is legal since performing LFTR could not
931 // increase the number of undef users.
932 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
933 if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
934 !isLoopExitTestBasedOn(IncPhi, ExitingBB))
935 continue;
936 }
937
938 // Avoid introducing undefined behavior due to poison which didn't exist in
939 // the original program. (Annoyingly, the rules for poison and undef
940 // propagation are distinct, so this does NOT cover the undef case above.)
941 // We have to ensure that we don't introduce UB by introducing a use on an
942 // iteration where said IV produces poison. Our strategy here differs for
943 // pointers and integer IVs. For integers, we strip and reinfer as needed,
944 // see code in linearFunctionTestReplace. For pointers, we restrict
945 // transforms as there is no good way to reinfer inbounds once lost.
946 if (!Phi->getType()->isIntegerTy() &&
947 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
948 continue;
949
950 const SCEV *Init = AR->getStart();
951
952 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
953 // Don't force a live loop counter if another IV can be used.
954 if (AlmostDeadIV(Phi, LatchBlock, Cond))
955 continue;
956
957 // Prefer to count-from-zero. This is a more "canonical" counter form. It
958 // also prefers integer to pointer IVs.
959 if (BestInit->isZero() != Init->isZero()) {
960 if (BestInit->isZero())
961 continue;
962 }
963 // If two IVs both count from zero or both count from nonzero then the
964 // narrower is likely a dead phi that has been widened. Use the wider phi
965 // to allow the other to be eliminated.
966 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
967 continue;
968 }
969 BestPhi = Phi;
970 BestInit = Init;
971 }
972 return BestPhi;
973}
974
975/// Insert an IR expression which computes the value held by the IV IndVar
976/// (which must be an loop counter w/unit stride) after the backedge of loop L
977/// is taken ExitCount times.
978static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
979 const SCEV *ExitCount, bool UsePostInc, Loop *L,
980 SCEVExpander &Rewriter, ScalarEvolution *SE) {
981 assert(isLoopCounter(IndVar, L, SE));
982 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
983 const SCEV *IVInit = AR->getStart();
984 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
985
986 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
987 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
988 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
989 // the existing GEPs whenever possible.
990 if (IndVar->getType()->isPointerTy() &&
991 !ExitCount->getType()->isPointerTy()) {
992 // IVOffset will be the new GEP offset that is interpreted by GEP as a
993 // signed value. ExitCount on the other hand represents the loop trip count,
994 // which is an unsigned value. FindLoopCounter only allows induction
995 // variables that have a positive unit stride of one. This means we don't
996 // have to handle the case of negative offsets (yet) and just need to zero
997 // extend ExitCount.
998 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
999 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
1000 if (UsePostInc)
1001 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
1002
1003 // Expand the code for the iteration count.
1004 assert(SE->isLoopInvariant(IVOffset, L) &&
1005 "Computed iteration count is not loop invariant!");
1006
1007 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
1008 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1009 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
1010 } else {
1011 // In any other case, convert both IVInit and ExitCount to integers before
1012 // comparing. This may result in SCEV expansion of pointers, but in practice
1013 // SCEV will fold the pointer arithmetic away as such:
1014 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
1015 //
1016 // Valid Cases: (1) both integers is most common; (2) both may be pointers
1017 // for simple memset-style loops.
1018 //
1019 // IVInit integer and ExitCount pointer would only occur if a canonical IV
1020 // were generated on top of case #2, which is not expected.
1021
1022 // For unit stride, IVCount = Start + ExitCount with 2's complement
1023 // overflow.
1024
1025 // For integer IVs, truncate the IV before computing IVInit + BECount,
1026 // unless we know apriori that the limit must be a constant when evaluated
1027 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate
1028 // of the IV in the loop over a (potentially) expensive expansion of the
1029 // widened exit count add(zext(add)) expression.
1030 if (SE->getTypeSizeInBits(IVInit->getType())
1031 > SE->getTypeSizeInBits(ExitCount->getType())) {
1032 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
1033 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
1034 else
1035 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
1036 }
1037
1038 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
1039
1040 if (UsePostInc)
1041 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
1042
1043 // Expand the code for the iteration count.
1044 assert(SE->isLoopInvariant(IVLimit, L) &&
1045 "Computed iteration count is not loop invariant!");
1046 // Ensure that we generate the same type as IndVar, or a smaller integer
1047 // type. In the presence of null pointer values, we have an integer type
1048 // SCEV expression (IVInit) for a pointer type IV value (IndVar).
1049 Type *LimitTy = ExitCount->getType()->isPointerTy() ?
1050 IndVar->getType() : ExitCount->getType();
1051 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1052 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
1053 }
1054}
1055
1056/// This method rewrites the exit condition of the loop to be a canonical !=
1057/// comparison against the incremented loop induction variable. This pass is
1058/// able to rewrite the exit tests of any loop where the SCEV analysis can
1059/// determine a loop-invariant trip count of the loop, which is actually a much
1060/// broader range than just linear tests.
1061bool IndVarSimplify::
1062linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
1063 const SCEV *ExitCount,
1064 PHINode *IndVar, SCEVExpander &Rewriter) {
1065 assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1066 assert(isLoopCounter(IndVar, L, SE));
1067 Instruction * const IncVar =
1068 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
1069
1070 // Initialize CmpIndVar to the preincremented IV.
1071 Value *CmpIndVar = IndVar;
1072 bool UsePostInc = false;
1073
1074 // If the exiting block is the same as the backedge block, we prefer to
1075 // compare against the post-incremented value, otherwise we must compare
1076 // against the preincremented value.
1077 if (ExitingBB == L->getLoopLatch()) {
1078 // For pointer IVs, we chose to not strip inbounds which requires us not
1079 // to add a potentially UB introducing use. We need to either a) show
1080 // the loop test we're modifying is already in post-inc form, or b) show
1081 // that adding a use must not introduce UB.
1082 bool SafeToPostInc =
1083 IndVar->getType()->isIntegerTy() ||
1084 isLoopExitTestBasedOn(IncVar, ExitingBB) ||
1085 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
1086 if (SafeToPostInc) {
1087 UsePostInc = true;
1088 CmpIndVar = IncVar;
1089 }
1090 }
1091
1092 // It may be necessary to drop nowrap flags on the incrementing instruction
1093 // if either LFTR moves from a pre-inc check to a post-inc check (in which
1094 // case the increment might have previously been poison on the last iteration
1095 // only) or if LFTR switches to a different IV that was previously dynamically
1096 // dead (and as such may be arbitrarily poison). We remove any nowrap flags
1097 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1098 // check), because the pre-inc addrec flags may be adopted from the original
1099 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1100 // TODO: This handling is inaccurate for one case: If we switch to a
1101 // dynamically dead IV that wraps on the first loop iteration only, which is
1102 // not covered by the post-inc addrec. (If the new IV was not dynamically
1103 // dead, it could not be poison on the first iteration in the first place.)
1104 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
1105 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
1106 if (BO->hasNoUnsignedWrap())
1107 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1108 if (BO->hasNoSignedWrap())
1109 BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1110 }
1111
1112 Value *ExitCnt = genLoopLimit(
1113 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1114 assert(ExitCnt->getType()->isPointerTy() ==
1115 IndVar->getType()->isPointerTy() &&
1116 "genLoopLimit missed a cast");
1117
1118 // Insert a new icmp_ne or icmp_eq instruction before the branch.
1119 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1121 if (L->contains(BI->getSuccessor(0)))
1122 P = ICmpInst::ICMP_NE;
1123 else
1124 P = ICmpInst::ICMP_EQ;
1125
1126 IRBuilder<> Builder(BI);
1127
1128 // The new loop exit condition should reuse the debug location of the
1129 // original loop exit condition.
1130 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1131 Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1132
1133 // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1134 // avoid the expensive expansion of the limit expression in the wider type,
1135 // emit a truncate to narrow the IV to the ExitCount type. This is safe
1136 // since we know (from the exit count bitwidth), that we can't self-wrap in
1137 // the narrower type.
1138 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1139 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1140 if (CmpIndVarSize > ExitCntSize) {
1141 assert(!CmpIndVar->getType()->isPointerTy() &&
1142 !ExitCnt->getType()->isPointerTy());
1143
1144 // Before resorting to actually inserting the truncate, use the same
1145 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1146 // the other side of the comparison instead. We still evaluate the limit
1147 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1148 // a truncate within in.
1149 bool Extended = false;
1150 const SCEV *IV = SE->getSCEV(CmpIndVar);
1151 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
1152 ExitCnt->getType());
1153 const SCEV *ZExtTrunc =
1154 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1155
1156 if (ZExtTrunc == IV) {
1157 Extended = true;
1158 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1159 "wide.trip.count");
1160 } else {
1161 const SCEV *SExtTrunc =
1162 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1163 if (SExtTrunc == IV) {
1164 Extended = true;
1165 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1166 "wide.trip.count");
1167 }
1168 }
1169
1170 if (Extended) {
1171 bool Discard;
1172 L->makeLoopInvariant(ExitCnt, Discard);
1173 } else
1174 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1175 "lftr.wideiv");
1176 }
1177 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1178 << " LHS:" << *CmpIndVar << '\n'
1179 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1180 << "\n"
1181 << " RHS:\t" << *ExitCnt << "\n"
1182 << "ExitCount:\t" << *ExitCount << "\n"
1183 << " was: " << *BI->getCondition() << "\n");
1184
1185 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1186 Value *OrigCond = BI->getCondition();
1187 // It's tempting to use replaceAllUsesWith here to fully replace the old
1188 // comparison, but that's not immediately safe, since users of the old
1189 // comparison may not be dominated by the new comparison. Instead, just
1190 // update the branch to use the new comparison; in the common case this
1191 // will make old comparison dead.
1192 BI->setCondition(Cond);
1193 DeadInsts.emplace_back(OrigCond);
1194
1195 ++NumLFTR;
1196 return true;
1197}
1198
1199//===----------------------------------------------------------------------===//
1200// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1201//===----------------------------------------------------------------------===//
1202
1203/// If there's a single exit block, sink any loop-invariant values that
1204/// were defined in the preheader but not used inside the loop into the
1205/// exit block to reduce register pressure in the loop.
1206bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1207 BasicBlock *ExitBlock = L->getExitBlock();
1208 if (!ExitBlock) return false;
1209
1210 BasicBlock *Preheader = L->getLoopPreheader();
1211 if (!Preheader) return false;
1212
1213 bool MadeAnyChanges = false;
1214 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1215 BasicBlock::iterator I(Preheader->getTerminator());
1216 while (I != Preheader->begin()) {
1217 --I;
1218 // New instructions were inserted at the end of the preheader.
1219 if (isa<PHINode>(I))
1220 break;
1221
1222 // Don't move instructions which might have side effects, since the side
1223 // effects need to complete before instructions inside the loop. Also don't
1224 // move instructions which might read memory, since the loop may modify
1225 // memory. Note that it's okay if the instruction might have undefined
1226 // behavior: LoopSimplify guarantees that the preheader dominates the exit
1227 // block.
1228 if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1229 continue;
1230
1231 // Skip debug info intrinsics.
1232 if (isa<DbgInfoIntrinsic>(I))
1233 continue;
1234
1235 // Skip eh pad instructions.
1236 if (I->isEHPad())
1237 continue;
1238
1239 // Don't sink alloca: we never want to sink static alloca's out of the
1240 // entry block, and correctly sinking dynamic alloca's requires
1241 // checks for stacksave/stackrestore intrinsics.
1242 // FIXME: Refactor this check somehow?
1243 if (isa<AllocaInst>(I))
1244 continue;
1245
1246 // Determine if there is a use in or before the loop (direct or
1247 // otherwise).
1248 bool UsedInLoop = false;
1249 for (Use &U : I->uses()) {
1250 Instruction *User = cast<Instruction>(U.getUser());
1251 BasicBlock *UseBB = User->getParent();
1252 if (PHINode *P = dyn_cast<PHINode>(User)) {
1253 unsigned i =
1255 UseBB = P->getIncomingBlock(i);
1256 }
1257 if (UseBB == Preheader || L->contains(UseBB)) {
1258 UsedInLoop = true;
1259 break;
1260 }
1261 }
1262
1263 // If there is, the def must remain in the preheader.
1264 if (UsedInLoop)
1265 continue;
1266
1267 // Otherwise, sink it to the exit block.
1268 Instruction *ToMove = &*I;
1269 bool Done = false;
1270
1271 if (I != Preheader->begin()) {
1272 // Skip debug info intrinsics.
1273 do {
1274 --I;
1275 } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1276
1277 if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1278 Done = true;
1279 } else {
1280 Done = true;
1281 }
1282
1283 MadeAnyChanges = true;
1284 ToMove->moveBefore(*ExitBlock, InsertPt);
1285 SE->forgetValue(ToMove);
1286 if (Done) break;
1287 InsertPt = ToMove->getIterator();
1288 }
1289
1290 return MadeAnyChanges;
1291}
1292
1293static void replaceExitCond(BranchInst *BI, Value *NewCond,
1295 auto *OldCond = BI->getCondition();
1296 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1297 << " with " << *NewCond << "\n");
1298 BI->setCondition(NewCond);
1299 if (OldCond->use_empty())
1300 DeadInsts.emplace_back(OldCond);
1301}
1302
1303static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1304 bool IsTaken) {
1305 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1306 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1307 auto *OldCond = BI->getCondition();
1308 return ConstantInt::get(OldCond->getType(),
1309 IsTaken ? ExitIfTrue : !ExitIfTrue);
1310}
1311
1312static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1314 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1315 auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1316 replaceExitCond(BI, NewCond, DeadInsts);
1317}
1318
1320 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1321 ScalarEvolution &SE) {
1322 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1323 auto *LoopPreheader = L->getLoopPreheader();
1324 auto *LoopHeader = L->getHeader();
1326 for (auto &PN : LoopHeader->phis()) {
1327 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1328 for (User *U : PN.users())
1329 Worklist.push_back(cast<Instruction>(U));
1330 SE.forgetValue(&PN);
1331 PN.replaceAllUsesWith(PreheaderIncoming);
1332 DeadInsts.emplace_back(&PN);
1333 }
1334
1335 // Replacing with the preheader value will often allow IV users to simplify
1336 // (especially if the preheader value is a constant).
1338 while (!Worklist.empty()) {
1339 auto *I = cast<Instruction>(Worklist.pop_back_val());
1340 if (!Visited.insert(I).second)
1341 continue;
1342
1343 // Don't simplify instructions outside the loop.
1344 if (!L->contains(I))
1345 continue;
1346
1347 Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout());
1348 if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1349 for (User *U : I->users())
1350 Worklist.push_back(cast<Instruction>(U));
1351 I->replaceAllUsesWith(Res);
1352 DeadInsts.emplace_back(I);
1353 }
1354 }
1355}
1356
1357static Value *
1360 SCEVExpander &Rewriter) {
1361 ICmpInst::Predicate InvariantPred = LIP.Pred;
1362 BasicBlock *Preheader = L->getLoopPreheader();
1363 assert(Preheader && "Preheader doesn't exist");
1364 Rewriter.setInsertPoint(Preheader->getTerminator());
1365 auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1366 auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1367 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1368 if (ExitIfTrue)
1369 InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1370 IRBuilder<> Builder(Preheader->getTerminator());
1371 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1372 return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1373 BI->getCondition()->getName());
1374}
1375
1376static std::optional<Value *>
1377createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1378 const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1379 ScalarEvolution *SE, SCEVExpander &Rewriter) {
1380 ICmpInst::Predicate Pred = ICmp->getPredicate();
1381 Value *LHS = ICmp->getOperand(0);
1382 Value *RHS = ICmp->getOperand(1);
1383
1384 // 'LHS pred RHS' should now mean that we stay in loop.
1385 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1386 if (Inverted)
1387 Pred = CmpInst::getInversePredicate(Pred);
1388
1389 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1390 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1391 // Can we prove it to be trivially true or false?
1392 if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1393 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1394
1395 auto *ARTy = LHSS->getType();
1396 auto *MaxIterTy = MaxIter->getType();
1397 // If possible, adjust types.
1398 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1399 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1400 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1401 const SCEV *MinusOne = SE->getMinusOne(ARTy);
1402 auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1403 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1404 MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1405 }
1406
1407 if (SkipLastIter) {
1408 // Semantically skip last iter is "subtract 1, do not bother about unsigned
1409 // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1410 // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1411 // So we manually construct umin(a - 1, b - 1).
1413 if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1414 for (auto *Op : UMin->operands())
1415 Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1416 MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1417 } else
1418 MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1419 }
1420
1421 // Check if there is a loop-invariant predicate equivalent to our check.
1422 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1423 L, BI, MaxIter);
1424 if (!LIP)
1425 return std::nullopt;
1426
1427 // Can we prove it to be trivially true?
1428 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1429 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1430 else
1431 return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1432}
1433
1435 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1436 bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1438 assert(
1439 (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1440 "Not a loop exit!");
1441
1442 // For branch that stays in loop by TRUE condition, go through AND. For branch
1443 // that stays in loop by FALSE condition, go through OR. Both gives the
1444 // similar logic: "stay in loop iff all conditions are true(false)".
1445 bool Inverted = L->contains(BI->getSuccessor(1));
1446 SmallVector<ICmpInst *, 4> LeafConditions;
1447 SmallVector<Value *, 4> Worklist;
1449 Value *OldCond = BI->getCondition();
1450 Visited.insert(OldCond);
1451 Worklist.push_back(OldCond);
1452
1453 auto GoThrough = [&](Value *V) {
1454 Value *LHS = nullptr, *RHS = nullptr;
1455 if (Inverted) {
1456 if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1457 return false;
1458 } else {
1459 if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1460 return false;
1461 }
1462 if (Visited.insert(LHS).second)
1463 Worklist.push_back(LHS);
1464 if (Visited.insert(RHS).second)
1465 Worklist.push_back(RHS);
1466 return true;
1467 };
1468
1469 do {
1470 Value *Curr = Worklist.pop_back_val();
1471 // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1472 // those with one use, to avoid instruction duplication.
1473 if (Curr->hasOneUse())
1474 if (!GoThrough(Curr))
1475 if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1476 LeafConditions.push_back(ICmp);
1477 } while (!Worklist.empty());
1478
1479 // If the current basic block has the same exit count as the whole loop, and
1480 // it consists of multiple icmp's, try to collect all icmp's that give exact
1481 // same exit count. For all other icmp's, we could use one less iteration,
1482 // because their value on the last iteration doesn't really matter.
1483 SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1484 if (!SkipLastIter && LeafConditions.size() > 1 &&
1485 SE->getExitCount(L, ExitingBB,
1486 ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1487 MaxIter)
1488 for (auto *ICmp : LeafConditions) {
1489 auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1490 /*ControlsExit*/ false);
1491 auto *ExitMax = EL.SymbolicMaxNotTaken;
1492 if (isa<SCEVCouldNotCompute>(ExitMax))
1493 continue;
1494 // They could be of different types (specifically this happens after
1495 // IV widening).
1496 auto *WiderType =
1497 SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1498 auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1499 auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1500 if (WideExitMax == WideMaxIter)
1501 ICmpsFailingOnLastIter.insert(ICmp);
1502 }
1503
1504 bool Changed = false;
1505 for (auto *OldCond : LeafConditions) {
1506 // Skip last iteration for this icmp under one of two conditions:
1507 // - We do it for all conditions;
1508 // - There is another ICmp that would fail on last iter, so this one doesn't
1509 // really matter.
1510 bool OptimisticSkipLastIter = SkipLastIter;
1511 if (!OptimisticSkipLastIter) {
1512 if (ICmpsFailingOnLastIter.size() > 1)
1513 OptimisticSkipLastIter = true;
1514 else if (ICmpsFailingOnLastIter.size() == 1)
1515 OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1516 }
1517 if (auto Replaced =
1518 createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1519 OptimisticSkipLastIter, SE, Rewriter)) {
1520 Changed = true;
1521 auto *NewCond = *Replaced;
1522 if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1523 NCI->setName(OldCond->getName() + ".first_iter");
1524 }
1525 LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1526 << " with " << *NewCond << "\n");
1527 assert(OldCond->hasOneUse() && "Must be!");
1528 OldCond->replaceAllUsesWith(NewCond);
1529 DeadInsts.push_back(OldCond);
1530 // Make sure we no longer consider this condition as failing on last
1531 // iteration.
1532 ICmpsFailingOnLastIter.erase(OldCond);
1533 }
1534 }
1535 return Changed;
1536}
1537
1538bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1539 // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1540 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1541 // never reaches the icmp since the zext doesn't fold to an AddRec unless
1542 // it already has flags. The alternative to this would be to extending the
1543 // set of "interesting" IV users to include the icmp, but doing that
1544 // regresses results in practice by querying SCEVs before trip counts which
1545 // rely on them which results in SCEV caching sub-optimal answers. The
1546 // concern about caching sub-optimal results is why we only query SCEVs of
1547 // the loop invariant RHS here.
1548 SmallVector<BasicBlock*, 16> ExitingBlocks;
1549 L->getExitingBlocks(ExitingBlocks);
1550 bool Changed = false;
1551 for (auto *ExitingBB : ExitingBlocks) {
1552 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1553 if (!BI)
1554 continue;
1555 assert(BI->isConditional() && "exit branch must be conditional");
1556
1557 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1558 if (!ICmp || !ICmp->hasOneUse())
1559 continue;
1560
1561 auto *LHS = ICmp->getOperand(0);
1562 auto *RHS = ICmp->getOperand(1);
1563 // For the range reasoning, avoid computing SCEVs in the loop to avoid
1564 // poisoning cache with sub-optimal results. For the must-execute case,
1565 // this is a neccessary precondition for correctness.
1566 if (!L->isLoopInvariant(RHS)) {
1567 if (!L->isLoopInvariant(LHS))
1568 continue;
1569 // Same logic applies for the inverse case
1570 std::swap(LHS, RHS);
1571 }
1572
1573 // Match (icmp signed-cond zext, RHS)
1574 Value *LHSOp = nullptr;
1575 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1576 continue;
1577
1578 const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1579 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1580 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1581 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1582 FullCR = FullCR.zeroExtend(OuterBitWidth);
1583 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1584 if (FullCR.contains(RHSCR)) {
1585 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1586 // replace the signed condition with the unsigned version.
1587 ICmp->setPredicate(ICmp->getUnsignedPredicate());
1588 Changed = true;
1589 // Note: No SCEV invalidation needed. We've changed the predicate, but
1590 // have not changed exit counts, or the values produced by the compare.
1591 continue;
1592 }
1593 }
1594
1595 // Now that we've canonicalized the condition to match the extend,
1596 // see if we can rotate the extend out of the loop.
1597 for (auto *ExitingBB : ExitingBlocks) {
1598 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1599 if (!BI)
1600 continue;
1601 assert(BI->isConditional() && "exit branch must be conditional");
1602
1603 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1604 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1605 continue;
1606
1607 bool Swapped = false;
1608 auto *LHS = ICmp->getOperand(0);
1609 auto *RHS = ICmp->getOperand(1);
1610 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1611 // Nothing to rotate
1612 continue;
1613 if (L->isLoopInvariant(LHS)) {
1614 // Same logic applies for the inverse case until we actually pick
1615 // which operand of the compare to update.
1616 Swapped = true;
1617 std::swap(LHS, RHS);
1618 }
1619 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1620
1621 // Match (icmp unsigned-cond zext, RHS)
1622 // TODO: Extend to handle corresponding sext/signed-cmp case
1623 // TODO: Extend to other invertible functions
1624 Value *LHSOp = nullptr;
1625 if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1626 continue;
1627
1628 // In general, we only rotate if we can do so without increasing the number
1629 // of instructions. The exception is when we have an zext(add-rec). The
1630 // reason for allowing this exception is that we know we need to get rid
1631 // of the zext for SCEV to be able to compute a trip count for said loops;
1632 // we consider the new trip count valuable enough to increase instruction
1633 // count by one.
1634 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1635 continue;
1636
1637 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1638 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1639 // when zext is loop varying and RHS is loop invariant. This converts
1640 // loop varying work to loop-invariant work.
1641 auto doRotateTransform = [&]() {
1642 assert(ICmp->isUnsigned() && "must have proven unsigned already");
1643 auto *NewRHS =
1644 CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "",
1646 ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1647 ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1648 if (LHS->use_empty())
1649 DeadInsts.push_back(LHS);
1650 };
1651
1652
1653 const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1654 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1655 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1656 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1657 FullCR = FullCR.zeroExtend(OuterBitWidth);
1658 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1659 if (FullCR.contains(RHSCR)) {
1660 doRotateTransform();
1661 Changed = true;
1662 // Note, we are leaving SCEV in an unfortunately imprecise case here
1663 // as rotation tends to reveal information about trip counts not
1664 // previously visible.
1665 continue;
1666 }
1667 }
1668
1669 return Changed;
1670}
1671
1672bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1673 SmallVector<BasicBlock*, 16> ExitingBlocks;
1674 L->getExitingBlocks(ExitingBlocks);
1675
1676 // Remove all exits which aren't both rewriteable and execute on every
1677 // iteration.
1678 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1679 // If our exitting block exits multiple loops, we can only rewrite the
1680 // innermost one. Otherwise, we're changing how many times the innermost
1681 // loop runs before it exits.
1682 if (LI->getLoopFor(ExitingBB) != L)
1683 return true;
1684
1685 // Can't rewrite non-branch yet.
1686 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1687 if (!BI)
1688 return true;
1689
1690 // Likewise, the loop latch must be dominated by the exiting BB.
1691 if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1692 return true;
1693
1694 if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1695 // If already constant, nothing to do. However, if this is an
1696 // unconditional exit, we can still replace header phis with their
1697 // preheader value.
1698 if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1699 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1700 return true;
1701 }
1702
1703 return false;
1704 });
1705
1706 if (ExitingBlocks.empty())
1707 return false;
1708
1709 // Get a symbolic upper bound on the loop backedge taken count.
1710 const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1711 if (isa<SCEVCouldNotCompute>(MaxBECount))
1712 return false;
1713
1714 // Visit our exit blocks in order of dominance. We know from the fact that
1715 // all exits must dominate the latch, so there is a total dominance order
1716 // between them.
1717 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1718 // std::sort sorts in ascending order, so we want the inverse of
1719 // the normal dominance relation.
1720 if (A == B) return false;
1721 if (DT->properlyDominates(A, B))
1722 return true;
1723 else {
1724 assert(DT->properlyDominates(B, A) &&
1725 "expected total dominance order!");
1726 return false;
1727 }
1728 });
1729#ifdef ASSERT
1730 for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1731 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1732 }
1733#endif
1734
1735 bool Changed = false;
1736 bool SkipLastIter = false;
1737 const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1738 auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1739 if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1740 return;
1741 if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1742 CurrMaxExit = MaxExitCount;
1743 else
1744 CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1745 // If the loop has more than 1 iteration, all further checks will be
1746 // executed 1 iteration less.
1747 if (CurrMaxExit == MaxBECount)
1748 SkipLastIter = true;
1749 };
1750 SmallSet<const SCEV *, 8> DominatingExactExitCounts;
1751 for (BasicBlock *ExitingBB : ExitingBlocks) {
1752 const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1753 const SCEV *MaxExitCount = SE->getExitCount(
1754 L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1755 if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1756 // Okay, we do not know the exit count here. Can we at least prove that it
1757 // will remain the same within iteration space?
1758 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1759 auto OptimizeCond = [&](bool SkipLastIter) {
1760 return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1761 MaxBECount, SkipLastIter,
1762 SE, Rewriter, DeadInsts);
1763 };
1764
1765 // TODO: We might have proved that we can skip the last iteration for
1766 // this check. In this case, we only want to check the condition on the
1767 // pre-last iteration (MaxBECount - 1). However, there is a nasty
1768 // corner case:
1769 //
1770 // for (i = len; i != 0; i--) { ... check (i ult X) ... }
1771 //
1772 // If we could not prove that len != 0, then we also could not prove that
1773 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1774 // OptimizeCond will likely not prove anything for it, even if it could
1775 // prove the same fact for len.
1776 //
1777 // As a temporary solution, we query both last and pre-last iterations in
1778 // hope that we will be able to prove triviality for at least one of
1779 // them. We can stop querying MaxBECount for this case once SCEV
1780 // understands that (MaxBECount - 1) will not overflow here.
1781 if (OptimizeCond(false))
1782 Changed = true;
1783 else if (SkipLastIter && OptimizeCond(true))
1784 Changed = true;
1785 UpdateSkipLastIter(MaxExitCount);
1786 continue;
1787 }
1788
1789 UpdateSkipLastIter(ExactExitCount);
1790
1791 // If we know we'd exit on the first iteration, rewrite the exit to
1792 // reflect this. This does not imply the loop must exit through this
1793 // exit; there may be an earlier one taken on the first iteration.
1794 // We know that the backedge can't be taken, so we replace all
1795 // the header PHIs with values coming from the preheader.
1796 if (ExactExitCount->isZero()) {
1797 foldExit(L, ExitingBB, true, DeadInsts);
1798 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1799 Changed = true;
1800 continue;
1801 }
1802
1803 assert(ExactExitCount->getType()->isIntegerTy() &&
1804 MaxBECount->getType()->isIntegerTy() &&
1805 "Exit counts must be integers");
1806
1807 Type *WiderType =
1808 SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1809 ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1810 MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1811 assert(MaxBECount->getType() == ExactExitCount->getType());
1812
1813 // Can we prove that some other exit must be taken strictly before this
1814 // one?
1815 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1816 ExactExitCount)) {
1817 foldExit(L, ExitingBB, false, DeadInsts);
1818 Changed = true;
1819 continue;
1820 }
1821
1822 // As we run, keep track of which exit counts we've encountered. If we
1823 // find a duplicate, we've found an exit which would have exited on the
1824 // exiting iteration, but (from the visit order) strictly follows another
1825 // which does the same and is thus dead.
1826 if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1827 foldExit(L, ExitingBB, false, DeadInsts);
1828 Changed = true;
1829 continue;
1830 }
1831
1832 // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1833 // here. If we kept track of the min of dominanting exits so far, we could
1834 // discharge exits with EC >= MDEC. This is less powerful than the existing
1835 // transform (since later exits aren't considered), but potentially more
1836 // powerful for any case where SCEV can prove a >=u b, but neither a == b
1837 // or a >u b. Such a case is not currently known.
1838 }
1839 return Changed;
1840}
1841
1842bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1843 SmallVector<BasicBlock*, 16> ExitingBlocks;
1844 L->getExitingBlocks(ExitingBlocks);
1845
1846 // Finally, see if we can rewrite our exit conditions into a loop invariant
1847 // form. If we have a read-only loop, and we can tell that we must exit down
1848 // a path which does not need any of the values computed within the loop, we
1849 // can rewrite the loop to exit on the first iteration. Note that this
1850 // doesn't either a) tell us the loop exits on the first iteration (unless
1851 // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1852 // This transformation looks a lot like a restricted form of dead loop
1853 // elimination, but restricted to read-only loops and without neccesssarily
1854 // needing to kill the loop entirely.
1855 if (!LoopPredication)
1856 return false;
1857
1858 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1859 // through *explicit* control flow. We have to eliminate the possibility of
1860 // implicit exits (see below) before we know it's truly exact.
1861 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1862 if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1863 return false;
1864
1865 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1866 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1867
1868 auto BadExit = [&](BasicBlock *ExitingBB) {
1869 // If our exiting block exits multiple loops, we can only rewrite the
1870 // innermost one. Otherwise, we're changing how many times the innermost
1871 // loop runs before it exits.
1872 if (LI->getLoopFor(ExitingBB) != L)
1873 return true;
1874
1875 // Can't rewrite non-branch yet.
1876 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1877 if (!BI)
1878 return true;
1879
1880 // If already constant, nothing to do.
1881 if (isa<Constant>(BI->getCondition()))
1882 return true;
1883
1884 // If the exit block has phis, we need to be able to compute the values
1885 // within the loop which contains them. This assumes trivially lcssa phis
1886 // have already been removed; TODO: generalize
1887 BasicBlock *ExitBlock =
1888 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1889 if (!ExitBlock->phis().empty())
1890 return true;
1891
1892 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1893 if (isa<SCEVCouldNotCompute>(ExitCount) ||
1894 !Rewriter.isSafeToExpand(ExitCount))
1895 return true;
1896
1897 assert(SE->isLoopInvariant(ExitCount, L) &&
1898 "Exit count must be loop invariant");
1899 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1900 return false;
1901 };
1902
1903 // If we have any exits which can't be predicated themselves, than we can't
1904 // predicate any exit which isn't guaranteed to execute before it. Consider
1905 // two exits (a) and (b) which would both exit on the same iteration. If we
1906 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1907 // we could convert a loop from exiting through (a) to one exiting through
1908 // (b). Note that this problem exists only for exits with the same exit
1909 // count, and we could be more aggressive when exit counts are known inequal.
1910 llvm::sort(ExitingBlocks,
1911 [&](BasicBlock *A, BasicBlock *B) {
1912 // std::sort sorts in ascending order, so we want the inverse of
1913 // the normal dominance relation, plus a tie breaker for blocks
1914 // unordered by dominance.
1915 if (DT->properlyDominates(A, B)) return true;
1916 if (DT->properlyDominates(B, A)) return false;
1917 return A->getName() < B->getName();
1918 });
1919 // Check to see if our exit blocks are a total order (i.e. a linear chain of
1920 // exits before the backedge). If they aren't, reasoning about reachability
1921 // is complicated and we choose not to for now.
1922 for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1923 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1924 return false;
1925
1926 // Given our sorted total order, we know that exit[j] must be evaluated
1927 // after all exit[i] such j > i.
1928 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1929 if (BadExit(ExitingBlocks[i])) {
1930 ExitingBlocks.resize(i);
1931 break;
1932 }
1933
1934 if (ExitingBlocks.empty())
1935 return false;
1936
1937 // We rely on not being able to reach an exiting block on a later iteration
1938 // then it's statically compute exit count. The implementaton of
1939 // getExitCount currently has this invariant, but assert it here so that
1940 // breakage is obvious if this ever changes..
1941 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1942 return DT->dominates(ExitingBB, L->getLoopLatch());
1943 }));
1944
1945 // At this point, ExitingBlocks consists of only those blocks which are
1946 // predicatable. Given that, we know we have at least one exit we can
1947 // predicate if the loop is doesn't have side effects and doesn't have any
1948 // implicit exits (because then our exact BTC isn't actually exact).
1949 // @Reviewers - As structured, this is O(I^2) for loop nests. Any
1950 // suggestions on how to improve this? I can obviously bail out for outer
1951 // loops, but that seems less than ideal. MemorySSA can find memory writes,
1952 // is that enough for *all* side effects?
1953 for (BasicBlock *BB : L->blocks())
1954 for (auto &I : *BB)
1955 // TODO:isGuaranteedToTransfer
1956 if (I.mayHaveSideEffects())
1957 return false;
1958
1959 bool Changed = false;
1960 // Finally, do the actual predication for all predicatable blocks. A couple
1961 // of notes here:
1962 // 1) We don't bother to constant fold dominated exits with identical exit
1963 // counts; that's simply a form of CSE/equality propagation and we leave
1964 // it for dedicated passes.
1965 // 2) We insert the comparison at the branch. Hoisting introduces additional
1966 // legality constraints and we leave that to dedicated logic. We want to
1967 // predicate even if we can't insert a loop invariant expression as
1968 // peeling or unrolling will likely reduce the cost of the otherwise loop
1969 // varying check.
1970 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1972 Value *ExactBTCV = nullptr; // Lazily generated if needed.
1973 for (BasicBlock *ExitingBB : ExitingBlocks) {
1974 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1975
1976 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1977 Value *NewCond;
1978 if (ExitCount == ExactBTC) {
1979 NewCond = L->contains(BI->getSuccessor(0)) ?
1980 B.getFalse() : B.getTrue();
1981 } else {
1982 Value *ECV = Rewriter.expandCodeFor(ExitCount);
1983 if (!ExactBTCV)
1984 ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1985 Value *RHS = ExactBTCV;
1986 if (ECV->getType() != RHS->getType()) {
1987 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1988 ECV = B.CreateZExt(ECV, WiderTy);
1989 RHS = B.CreateZExt(RHS, WiderTy);
1990 }
1991 auto Pred = L->contains(BI->getSuccessor(0)) ?
1992 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1993 NewCond = B.CreateICmp(Pred, ECV, RHS);
1994 }
1995 Value *OldCond = BI->getCondition();
1996 BI->setCondition(NewCond);
1997 if (OldCond->use_empty())
1998 DeadInsts.emplace_back(OldCond);
1999 Changed = true;
2000 }
2001
2002 return Changed;
2003}
2004
2005//===----------------------------------------------------------------------===//
2006// IndVarSimplify driver. Manage several subpasses of IV simplification.
2007//===----------------------------------------------------------------------===//
2008
2009bool IndVarSimplify::run(Loop *L) {
2010 // We need (and expect!) the incoming loop to be in LCSSA.
2011 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2012 "LCSSA required to run indvars!");
2013
2014 // If LoopSimplify form is not available, stay out of trouble. Some notes:
2015 // - LSR currently only supports LoopSimplify-form loops. Indvars'
2016 // canonicalization can be a pessimization without LSR to "clean up"
2017 // afterwards.
2018 // - We depend on having a preheader; in particular,
2019 // Loop::getCanonicalInductionVariable only supports loops with preheaders,
2020 // and we're in trouble if we can't find the induction variable even when
2021 // we've manually inserted one.
2022 // - LFTR relies on having a single backedge.
2023 if (!L->isLoopSimplifyForm())
2024 return false;
2025
2026#ifndef NDEBUG
2027 // Used below for a consistency check only
2028 // Note: Since the result returned by ScalarEvolution may depend on the order
2029 // in which previous results are added to its cache, the call to
2030 // getBackedgeTakenCount() may change following SCEV queries.
2031 const SCEV *BackedgeTakenCount;
2032 if (VerifyIndvars)
2033 BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2034#endif
2035
2036 bool Changed = false;
2037 // If there are any floating-point recurrences, attempt to
2038 // transform them to use integer recurrences.
2039 Changed |= rewriteNonIntegerIVs(L);
2040
2041 // Create a rewriter object which we'll use to transform the code with.
2042 SCEVExpander Rewriter(*SE, DL, "indvars");
2043#ifndef NDEBUG
2044 Rewriter.setDebugType(DEBUG_TYPE);
2045#endif
2046
2047 // Eliminate redundant IV users.
2048 //
2049 // Simplification works best when run before other consumers of SCEV. We
2050 // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2051 // other expressions involving loop IVs have been evaluated. This helps SCEV
2052 // set no-wrap flags before normalizing sign/zero extension.
2053 Rewriter.disableCanonicalMode();
2054 Changed |= simplifyAndExtend(L, Rewriter, LI);
2055
2056 // Check to see if we can compute the final value of any expressions
2057 // that are recurrent in the loop, and substitute the exit values from the
2058 // loop into any instructions outside of the loop that use the final values
2059 // of the current expressions.
2060 if (ReplaceExitValue != NeverRepl) {
2061 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
2062 ReplaceExitValue, DeadInsts)) {
2063 NumReplaced += Rewrites;
2064 Changed = true;
2065 }
2066 }
2067
2068 // Eliminate redundant IV cycles.
2069 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
2070
2071 // Try to convert exit conditions to unsigned and rotate computation
2072 // out of the loop. Note: Handles invalidation internally if needed.
2073 Changed |= canonicalizeExitCondition(L);
2074
2075 // Try to eliminate loop exits based on analyzeable exit counts
2076 if (optimizeLoopExits(L, Rewriter)) {
2077 Changed = true;
2078 // Given we've changed exit counts, notify SCEV
2079 // Some nested loops may share same folded exit basic block,
2080 // thus we need to notify top most loop.
2081 SE->forgetTopmostLoop(L);
2082 }
2083
2084 // Try to form loop invariant tests for loop exits by changing how many
2085 // iterations of the loop run when that is unobservable.
2086 if (predicateLoopExits(L, Rewriter)) {
2087 Changed = true;
2088 // Given we've changed exit counts, notify SCEV
2089 SE->forgetLoop(L);
2090 }
2091
2092 // If we have a trip count expression, rewrite the loop's exit condition
2093 // using it.
2094 if (!DisableLFTR) {
2095 BasicBlock *PreHeader = L->getLoopPreheader();
2096
2097 SmallVector<BasicBlock*, 16> ExitingBlocks;
2098 L->getExitingBlocks(ExitingBlocks);
2099 for (BasicBlock *ExitingBB : ExitingBlocks) {
2100 // Can't rewrite non-branch yet.
2101 if (!isa<BranchInst>(ExitingBB->getTerminator()))
2102 continue;
2103
2104 // If our exitting block exits multiple loops, we can only rewrite the
2105 // innermost one. Otherwise, we're changing how many times the innermost
2106 // loop runs before it exits.
2107 if (LI->getLoopFor(ExitingBB) != L)
2108 continue;
2109
2110 if (!needsLFTR(L, ExitingBB))
2111 continue;
2112
2113 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2114 if (isa<SCEVCouldNotCompute>(ExitCount))
2115 continue;
2116
2117 // This was handled above, but as we form SCEVs, we can sometimes refine
2118 // existing ones; this allows exit counts to be folded to zero which
2119 // weren't when optimizeLoopExits saw them. Arguably, we should iterate
2120 // until stable to handle cases like this better.
2121 if (ExitCount->isZero())
2122 continue;
2123
2124 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2125 if (!IndVar)
2126 continue;
2127
2128 // Avoid high cost expansions. Note: This heuristic is questionable in
2129 // that our definition of "high cost" is not exactly principled.
2130 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2131 TTI, PreHeader->getTerminator()))
2132 continue;
2133
2134 // Check preconditions for proper SCEVExpander operation. SCEV does not
2135 // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2136 // any pass that uses the SCEVExpander must do it. This does not work
2137 // well for loop passes because SCEVExpander makes assumptions about
2138 // all loops, while LoopPassManager only forces the current loop to be
2139 // simplified.
2140 //
2141 // FIXME: SCEV expansion has no way to bail out, so the caller must
2142 // explicitly check any assumptions made by SCEV. Brittle.
2143 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2144 if (!AR || AR->getLoop()->getLoopPreheader())
2145 Changed |= linearFunctionTestReplace(L, ExitingBB,
2146 ExitCount, IndVar,
2147 Rewriter);
2148 }
2149 }
2150 // Clear the rewriter cache, because values that are in the rewriter's cache
2151 // can be deleted in the loop below, causing the AssertingVH in the cache to
2152 // trigger.
2153 Rewriter.clear();
2154
2155 // Now that we're done iterating through lists, clean up any instructions
2156 // which are now dead.
2157 while (!DeadInsts.empty()) {
2158 Value *V = DeadInsts.pop_back_val();
2159
2160 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2161 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2162 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2163 Changed |=
2164 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2165 }
2166
2167 // The Rewriter may not be used from this point on.
2168
2169 // Loop-invariant instructions in the preheader that aren't used in the
2170 // loop may be sunk below the loop to reduce register pressure.
2171 Changed |= sinkUnusedInvariants(L);
2172
2173 // rewriteFirstIterationLoopExitValues does not rely on the computation of
2174 // trip count and therefore can further simplify exit values in addition to
2175 // rewriteLoopExitValues.
2176 Changed |= rewriteFirstIterationLoopExitValues(L);
2177
2178 // Clean up dead instructions.
2179 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2180
2181 // Check a post-condition.
2182 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2183 "Indvars did not preserve LCSSA!");
2184
2185 // Verify that LFTR, and any other change have not interfered with SCEV's
2186 // ability to compute trip count. We may have *changed* the exit count, but
2187 // only by reducing it.
2188#ifndef NDEBUG
2189 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2190 SE->forgetLoop(L);
2191 const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2192 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2193 SE->getTypeSizeInBits(NewBECount->getType()))
2194 NewBECount = SE->getTruncateOrNoop(NewBECount,
2195 BackedgeTakenCount->getType());
2196 else
2197 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2198 NewBECount->getType());
2199 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
2200 NewBECount) && "indvars must preserve SCEV");
2201 }
2202 if (VerifyMemorySSA && MSSAU)
2203 MSSAU->getMemorySSA()->verifyMemorySSA();
2204#endif
2205
2206 return Changed;
2207}
2208
2211 LPMUpdater &) {
2212 Function *F = L.getHeader()->getParent();
2213 const DataLayout &DL = F->getParent()->getDataLayout();
2214
2215 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2216 WidenIndVars && AllowIVWidening);
2217 if (!IVS.run(&L))
2218 return PreservedAnalyses::all();
2219
2220 auto PA = getLoopPassPreservedAnalyses();
2221 PA.preserveSet<CFGAnalyses>();
2222 if (AR.MSSA)
2223 PA.preserve<MemorySSAAnalysis>();
2224 return PA;
2225}
2226
2227namespace {
2228
2229struct IndVarSimplifyLegacyPass : public LoopPass {
2230 static char ID; // Pass identification, replacement for typeid
2231
2232 IndVarSimplifyLegacyPass() : LoopPass(ID) {
2234 }
2235
2236 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2237 if (skipLoop(L))
2238 return false;
2239
2240 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2241 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2242 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2243 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2244 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2245 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2246 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2247 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2248 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2249 MemorySSA *MSSA = nullptr;
2250 if (MSSAAnalysis)
2251 MSSA = &MSSAAnalysis->getMSSA();
2252
2253 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening);
2254 return IVS.run(L);
2255 }
2256
2257 void getAnalysisUsage(AnalysisUsage &AU) const override {
2258 AU.setPreservesCFG();
2261 }
2262};
2263
2264} // end anonymous namespace
2265
2266char IndVarSimplifyLegacyPass::ID = 0;
2267
2268INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2269 "Induction Variable Simplification", false, false)
2271INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2272 "Induction Variable Simplification", false, false)
2273
2275 return new IndVarSimplifyLegacyPass();
2276}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
assume Assume Builder
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:678
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static Value * genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, const SCEV *ExitCount, bool UsePostInc, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
Insert an IR expression which computes the value held by the IV IndVar (which must be an loop counter...
static void replaceExitCond(BranchInst *BI, Value *NewCond, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static cl::opt< bool > DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization"))
static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB)
Whether the current loop exit test is based on this value.
static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, Instruction *OnPathTo, DominatorTree *DT)
Return true if undefined behavior would provable be executed on the path to OnPathTo if Root produced...
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(UnusedIndVarInLoop, "unusedindvarinloop", "only replace exit value when it is an unused " "induction variable in the loop and has cheap replacement cost"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
static cl::opt< bool > VerifyIndvars("verify-indvars", cl::Hidden, cl::desc("Verify the ScalarEvolution result after running indvars. Has no " "effect in release builds. (Note: this adds additional SCEV " "queries potentially changing the analysis result)"))
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
Update information about the induction variable that is extended by this sign or zero extend operatio...
static void replaceLoopPHINodesWithPreheaderValues(LoopInfo *LI, Loop *L, SmallVectorImpl< WeakTrackingVH > &DeadInsts, ScalarEvolution &SE)
static bool needsLFTR(Loop *L, BasicBlock *ExitingBB)
linearFunctionTestReplace policy.
static bool optimizeLoopExitWithUnknownExitCount(const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static Value * createInvariantCond(const Loop *L, BasicBlock *ExitingBB, const ScalarEvolution::LoopInvariantPredicate &LIP, SCEVExpander &Rewriter)
static bool isLoopCounter(PHINode *Phi, Loop *L, ScalarEvolution *SE)
Return true if the given phi is a "counter" in L.
static std::optional< Value * > createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB, const SCEV *MaxIter, bool Inverted, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter)
static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond)
Return true if this IV has any uses other than the (soon to be rewritten) loop exit test.
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value * > &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
Induction Variable Simplification
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L)
Given an Value which is hoped to be part of an add recurance in the given loop, return the associated...
static Constant * createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB, bool IsTaken)
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
#define DEBUG_TYPE
static PHINode * FindLoopCounter(Loop *L, BasicBlock *ExitingBB, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
Search the loop header for a loop counter (anadd rec w/step of one) suitable for use by LFTR.
static cl::opt< bool > AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), cl::desc("Allow widening of indvars to eliminate s/zext"))
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
Convert APF to an integer, if possible.
static cl::opt< bool > LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), cl::desc("Predicate conditions in read only loops"))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
IntegerType * Int32Ty
#define P(N)
if(VerifyEach)
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
This defines the Use class.
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:85
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1120
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
Represent the analysis usage information of a pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:314
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:372
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:245
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:146
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:428
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:675
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:721
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:747
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:748
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:724
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:733
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:722
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:723
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:745
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:732
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:726
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:729
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:743
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:730
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:725
@ ICMP_EQ
equal
Definition: InstrTypes.h:739
@ ICMP_NE
not equal
Definition: InstrTypes.h:740
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:746
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:734
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:731
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:832
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:808
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:256
const APFloat & getValueAPF() const
Definition: Constants.h:297
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:887
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:114
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:267
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This instruction compares its operands according to the predicate given to the constructor.
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2550
Interface for visiting interesting IV users that are recognized but not simplified by this utility.
virtual void visitCast(CastInst *Cast)=0
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
const BasicBlock * getParent() const
Definition: Instruction.h:90
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:168
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Class to represent integer types.
Definition: DerivedTypes.h:40
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:139
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:232
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:33
BlockT * getHeader() const
Definition: LoopInfo.h:105
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:195
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:107
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:183
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:188
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:142
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:992
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:1140
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
bool isLCSSAForm(const DominatorTree &DT, bool IgnoreTokens=true) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:462
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:479
bool isRecursivelyLCSSAForm(const DominatorTree &DT, const LoopInfo &LI, bool IgnoreTokens=true) const
Return true if this Loop and all inner subloops are in LCSSA form.
Definition: LoopInfo.cpp:469
bool makeLoopInvariant(Value *V, bool &Changed, Instruction *InsertPt=nullptr, MemorySSAUpdater *MSSAU=nullptr, ScalarEvolution *SE=nullptr) const
If the given value is an instruction inside of the loop and it can be hoisted, do so to make it trivi...
Definition: LoopInfo.cpp:70
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:936
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:986
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:305
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
void setIncomingValue(unsigned i, Value *V)
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1759
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class uses information about analyze scalars to rewrite expressions in canonical form.
This class represents an analyzed expression in the program.
bool isOne() const
Return true if the expression is a constant one.
bool isZero() const
Return true if the expression is a constant zero.
Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents a cast from signed integer to floating point.
The main scalar evolution driver.
Type * getWiderType(Type *Ty1, Type *Ty2) const
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
std::optional< bool > evaluatePredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Check whether the condition described by Pred, LHS, and RHS is true or false in the given Context.
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getMinusOne(Type *Ty)
Return a SCEV for the constant -1 of a specific type.
const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
Promote the operands to the wider of the types using zero-extension, and then perform a umin operatio...
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
ExitLimit computeExitLimitFromCond(const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsExit, bool AllowPredicates=false)
Compute the number of times the backedge of the specified loop will execute if its exit condition wer...
std::optional< LoopInvariantPredicate > getLoopInvariantExitCondDuringFirstIterations(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, const Instruction *CtxI, const SCEV *MaxIter)
If the result of the predicate LHS Pred RHS is loop invariant with respect to L at given Context duri...
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
size_type size() const
Definition: SmallPtrSet.h:93
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:379
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:177
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void resize(size_type N)
Definition: SmallVector.h:642
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:249
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:222
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
user_iterator_impl< User > user_iterator
Definition: Value.h:390
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:381
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
self_iterator getIterator()
Definition: ilist_node.h:82
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:975
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:703
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1735
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:537
@ Done
Definition: Threading.h:61
PHINode * createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges)
Widen Induction Variables - Extend the width of an IV to cover its widest uses.
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1683
void initializeIndVarSimplifyLegacyPassPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
cl::opt< unsigned > SCEVCheapExpansionBudget
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:141
bool mustTriggerUB(const Instruction *I, const SmallSet< const Value *, 16 > &KnownPoison)
Return true if the given instruction must trigger undefined behavior when I is executed with any oper...
TargetTransformInfo TTI
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:89
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
@ UMin
Unisgned integer min implemented in terms of select(cmp()).
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1998
int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
Definition: LoopUtils.cpp:1259
bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
Definition: Local.cpp:645
@ UnusedIndVarInLoop
Definition: LoopUtils.h:441
@ OnlyCheapRepl
Definition: LoopUtils.h:439
@ NeverRepl
Definition: LoopUtils.h:438
@ NoHardUse
Definition: LoopUtils.h:440
@ AlwaysRepl
Definition: LoopUtils.h:442
Pass * createIndVarSimplifyPass()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Collect information about induction variables that are used by sign/zero extend operations.
PHINode * NarrowIV