LLVM 22.0.0git
IndVarSimplify.cpp
Go to the documentation of this file.
1//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This transformation analyzes and transforms the induction variables (and
10// computations derived from them) into simpler forms suitable for subsequent
11// analysis and transformation.
12//
13// If the trip count of a loop is computable, this pass also makes the following
14// changes:
15// 1. The exit condition for the loop is canonicalized to compare the
16// induction value against the exit value. This turns loops like:
17// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18// 2. Any use outside of the loop of an expression derived from the indvar
19// is changed to compute the derived value outside of the loop, eliminating
20// the dependence on the exit value of the induction variable. If the only
21// purpose of the loop is to compute the exit value of some derived
22// expression, this transformation will make the loop dead.
23//
24//===----------------------------------------------------------------------===//
25
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/Statistic.h"
44#include "llvm/IR/BasicBlock.h"
45#include "llvm/IR/Constant.h"
47#include "llvm/IR/Constants.h"
48#include "llvm/IR/DataLayout.h"
50#include "llvm/IR/Dominators.h"
51#include "llvm/IR/Function.h"
52#include "llvm/IR/IRBuilder.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
57#include "llvm/IR/Intrinsics.h"
58#include "llvm/IR/PassManager.h"
60#include "llvm/IR/Type.h"
61#include "llvm/IR/Use.h"
62#include "llvm/IR/User.h"
63#include "llvm/IR/Value.h"
64#include "llvm/IR/ValueHandle.h"
67#include "llvm/Support/Debug.h"
76#include <cassert>
77#include <cstdint>
78#include <utility>
79
80using namespace llvm;
81using namespace PatternMatch;
82using namespace SCEVPatternMatch;
83
84#define DEBUG_TYPE "indvars"
85
86STATISTIC(NumWidened , "Number of indvars widened");
87STATISTIC(NumReplaced , "Number of exit values replaced");
88STATISTIC(NumLFTR , "Number of loop exit tests replaced");
89STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
90STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
91
93 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
94 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
96 clEnumValN(NeverRepl, "never", "never replace exit value"),
98 "only replace exit value when the cost is cheap"),
100 UnusedIndVarInLoop, "unusedindvarinloop",
101 "only replace exit value when it is an unused "
102 "induction variable in the loop and has cheap replacement cost"),
103 clEnumValN(NoHardUse, "noharduse",
104 "only replace exit values when loop def likely dead"),
105 clEnumValN(AlwaysRepl, "always",
106 "always replace exit value whenever possible")));
107
109 "indvars-post-increment-ranges", cl::Hidden,
110 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
111 cl::init(true));
112
113static cl::opt<bool>
114DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
115 cl::desc("Disable Linear Function Test Replace optimization"));
116
117static cl::opt<bool>
118LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
119 cl::desc("Predicate conditions in read only loops"));
120
122 "indvars-predicate-loop-traps", cl::Hidden, cl::init(true),
123 cl::desc("Predicate conditions that trap in loops with only local writes"));
124
125static cl::opt<bool>
126AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
127 cl::desc("Allow widening of indvars to eliminate s/zext"));
128
129namespace {
130
131class IndVarSimplify {
132 LoopInfo *LI;
133 ScalarEvolution *SE;
134 DominatorTree *DT;
135 const DataLayout &DL;
138 std::unique_ptr<MemorySSAUpdater> MSSAU;
139
141 bool WidenIndVars;
142
143 bool RunUnswitching = false;
144
145 bool handleFloatingPointIV(Loop *L, PHINode *PH);
146 bool rewriteNonIntegerIVs(Loop *L);
147
148 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
149 /// Try to improve our exit conditions by converting condition from signed
150 /// to unsigned or rotating computation out of the loop.
151 /// (See inline comment about why this is duplicated from simplifyAndExtend)
152 bool canonicalizeExitCondition(Loop *L);
153 /// Try to eliminate loop exits based on analyzeable exit counts
154 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
155 /// Try to form loop invariant tests for loop exits by changing how many
156 /// iterations of the loop run when that is unobservable.
157 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
158
159 bool rewriteFirstIterationLoopExitValues(Loop *L);
160
161 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
162 const SCEV *ExitCount,
163 PHINode *IndVar, SCEVExpander &Rewriter);
164
165 bool sinkUnusedInvariants(Loop *L);
166
167public:
168 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
169 const DataLayout &DL, TargetLibraryInfo *TLI,
170 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
171 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
172 WidenIndVars(WidenIndVars) {
173 if (MSSA)
174 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
175 }
176
177 bool run(Loop *L);
178
179 bool runUnswitching() const { return RunUnswitching; }
180};
181
182} // end anonymous namespace
183
184//===----------------------------------------------------------------------===//
185// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
186//===----------------------------------------------------------------------===//
187
188/// Convert APF to an integer, if possible.
189static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
190 bool isExact = false;
191 // See if we can convert this to an int64_t
192 uint64_t UIntVal;
193 if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
194 APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
195 !isExact)
196 return false;
197 IntVal = UIntVal;
198 return true;
199}
200
201/// If the loop has floating induction variable then insert corresponding
202/// integer induction variable if possible.
203/// For example,
204/// for(double i = 0; i < 10000; ++i)
205/// bar(i)
206/// is converted into
207/// for(int i = 0; i < 10000; ++i)
208/// bar((double)i);
209bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
210 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
211 unsigned BackEdge = IncomingEdge^1;
212
213 // Check incoming value.
214 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
215
216 int64_t InitValue;
217 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
218 return false;
219
220 // Check IV increment. Reject this PN if increment operation is not
221 // an add or increment value can not be represented by an integer.
222 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
223 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
224
225 // If this is not an add of the PHI with a constantfp, or if the constant fp
226 // is not an integer, bail out.
227 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
228 int64_t IncValue;
229 if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
230 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
231 return false;
232
233 // Check Incr uses. One user is PN and the other user is an exit condition
234 // used by the conditional terminator.
235 Value::user_iterator IncrUse = Incr->user_begin();
236 Instruction *U1 = cast<Instruction>(*IncrUse++);
237 if (IncrUse == Incr->user_end()) return false;
238 Instruction *U2 = cast<Instruction>(*IncrUse++);
239 if (IncrUse != Incr->user_end()) return false;
240
241 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
242 // only used by a branch, we can't transform it.
243 FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
244 if (!Compare)
246 if (!Compare || !Compare->hasOneUse() ||
247 !isa<BranchInst>(Compare->user_back()))
248 return false;
249
250 BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
251
252 // We need to verify that the branch actually controls the iteration count
253 // of the loop. If not, the new IV can overflow and no one will notice.
254 // The branch block must be in the loop and one of the successors must be out
255 // of the loop.
256 assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
257 if (!L->contains(TheBr->getParent()) ||
258 (L->contains(TheBr->getSuccessor(0)) &&
259 L->contains(TheBr->getSuccessor(1))))
260 return false;
261
262 // If it isn't a comparison with an integer-as-fp (the exit value), we can't
263 // transform it.
264 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
265 int64_t ExitValue;
266 if (ExitValueVal == nullptr ||
267 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
268 return false;
269
270 // Find new predicate for integer comparison.
272 switch (Compare->getPredicate()) {
273 default: return false; // Unknown comparison.
275 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
277 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
279 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
281 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
283 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
285 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
286 }
287
288 // We convert the floating point induction variable to a signed i32 value if
289 // we can. This is only safe if the comparison will not overflow in a way
290 // that won't be trapped by the integer equivalent operations. Check for this
291 // now.
292 // TODO: We could use i64 if it is native and the range requires it.
293
294 // The start/stride/exit values must all fit in signed i32.
295 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
296 return false;
297
298 // If not actually striding (add x, 0.0), avoid touching the code.
299 if (IncValue == 0)
300 return false;
301
302 // Positive and negative strides have different safety conditions.
303 if (IncValue > 0) {
304 // If we have a positive stride, we require the init to be less than the
305 // exit value.
306 if (InitValue >= ExitValue)
307 return false;
308
309 uint32_t Range = uint32_t(ExitValue-InitValue);
310 // Check for infinite loop, either:
311 // while (i <= Exit) or until (i > Exit)
312 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
313 if (++Range == 0) return false; // Range overflows.
314 }
315
316 unsigned Leftover = Range % uint32_t(IncValue);
317
318 // If this is an equality comparison, we require that the strided value
319 // exactly land on the exit value, otherwise the IV condition will wrap
320 // around and do things the fp IV wouldn't.
321 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
322 Leftover != 0)
323 return false;
324
325 // If the stride would wrap around the i32 before exiting, we can't
326 // transform the IV.
327 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
328 return false;
329 } else {
330 // If we have a negative stride, we require the init to be greater than the
331 // exit value.
332 if (InitValue <= ExitValue)
333 return false;
334
335 uint32_t Range = uint32_t(InitValue-ExitValue);
336 // Check for infinite loop, either:
337 // while (i >= Exit) or until (i < Exit)
338 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
339 if (++Range == 0) return false; // Range overflows.
340 }
341
342 unsigned Leftover = Range % uint32_t(-IncValue);
343
344 // If this is an equality comparison, we require that the strided value
345 // exactly land on the exit value, otherwise the IV condition will wrap
346 // around and do things the fp IV wouldn't.
347 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
348 Leftover != 0)
349 return false;
350
351 // If the stride would wrap around the i32 before exiting, we can't
352 // transform the IV.
353 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
354 return false;
355 }
356
357 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
358
359 // Insert new integer induction variable.
360 PHINode *NewPHI =
361 PHINode::Create(Int32Ty, 2, PN->getName() + ".int", PN->getIterator());
362 NewPHI->addIncoming(ConstantInt::getSigned(Int32Ty, InitValue),
363 PN->getIncomingBlock(IncomingEdge));
364 NewPHI->setDebugLoc(PN->getDebugLoc());
365
366 Instruction *NewAdd = BinaryOperator::CreateAdd(
367 NewPHI, ConstantInt::getSigned(Int32Ty, IncValue),
368 Incr->getName() + ".int", Incr->getIterator());
369 NewAdd->setDebugLoc(Incr->getDebugLoc());
370 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
371
372 ICmpInst *NewCompare = new ICmpInst(
373 TheBr->getIterator(), NewPred, NewAdd,
374 ConstantInt::getSigned(Int32Ty, ExitValue), Compare->getName());
375 NewCompare->setDebugLoc(Compare->getDebugLoc());
376
377 // In the following deletions, PN may become dead and may be deleted.
378 // Use a WeakTrackingVH to observe whether this happens.
379 WeakTrackingVH WeakPH = PN;
380
381 // Delete the old floating point exit comparison. The branch starts using the
382 // new comparison.
383 NewCompare->takeName(Compare);
384 Compare->replaceAllUsesWith(NewCompare);
385 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
386
387 // Delete the old floating point increment.
388 Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
389 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
390
391 // If the FP induction variable still has uses, this is because something else
392 // in the loop uses its value. In order to canonicalize the induction
393 // variable, we chose to eliminate the IV and rewrite it in terms of an
394 // int->fp cast.
395 //
396 // We give preference to sitofp over uitofp because it is faster on most
397 // platforms.
398 if (WeakPH) {
399 Instruction *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
400 PN->getParent()->getFirstInsertionPt());
401 Conv->setDebugLoc(PN->getDebugLoc());
402 PN->replaceAllUsesWith(Conv);
403 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
404 }
405 return true;
406}
407
408bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
409 // First step. Check to see if there are any floating-point recurrences.
410 // If there are, change them into integer recurrences, permitting analysis by
411 // the SCEV routines.
412 BasicBlock *Header = L->getHeader();
413
415
416 bool Changed = false;
417 for (WeakTrackingVH &PHI : PHIs)
418 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHI))
419 Changed |= handleFloatingPointIV(L, PN);
420
421 // If the loop previously had floating-point IV, ScalarEvolution
422 // may not have been able to compute a trip count. Now that we've done some
423 // re-writing, the trip count may be computable.
424 if (Changed)
425 SE->forgetLoop(L);
426 return Changed;
427}
428
429//===---------------------------------------------------------------------===//
430// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
431// they will exit at the first iteration.
432//===---------------------------------------------------------------------===//
433
434/// Check to see if this loop has loop invariant conditions which lead to loop
435/// exits. If so, we know that if the exit path is taken, it is at the first
436/// loop iteration. This lets us predict exit values of PHI nodes that live in
437/// loop header.
438bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
439 // Verify the input to the pass is already in LCSSA form.
440 assert(L->isLCSSAForm(*DT));
441
442 SmallVector<BasicBlock *, 8> ExitBlocks;
443 L->getUniqueExitBlocks(ExitBlocks);
444
445 bool MadeAnyChanges = false;
446 for (auto *ExitBB : ExitBlocks) {
447 // If there are no more PHI nodes in this exit block, then no more
448 // values defined inside the loop are used on this path.
449 for (PHINode &PN : ExitBB->phis()) {
450 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
451 IncomingValIdx != E; ++IncomingValIdx) {
452 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
453
454 // Can we prove that the exit must run on the first iteration if it
455 // runs at all? (i.e. early exits are fine for our purposes, but
456 // traces which lead to this exit being taken on the 2nd iteration
457 // aren't.) Note that this is about whether the exit branch is
458 // executed, not about whether it is taken.
459 if (!L->getLoopLatch() ||
460 !DT->dominates(IncomingBB, L->getLoopLatch()))
461 continue;
462
463 // Get condition that leads to the exit path.
464 auto *TermInst = IncomingBB->getTerminator();
465
466 Value *Cond = nullptr;
467 if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
468 // Must be a conditional branch, otherwise the block
469 // should not be in the loop.
470 Cond = BI->getCondition();
471 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
472 Cond = SI->getCondition();
473 else
474 continue;
475
476 if (!L->isLoopInvariant(Cond))
477 continue;
478
479 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
480
481 // Only deal with PHIs in the loop header.
482 if (!ExitVal || ExitVal->getParent() != L->getHeader())
483 continue;
484
485 // If ExitVal is a PHI on the loop header, then we know its
486 // value along this exit because the exit can only be taken
487 // on the first iteration.
488 auto *LoopPreheader = L->getLoopPreheader();
489 assert(LoopPreheader && "Invalid loop");
490 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
491 if (PreheaderIdx != -1) {
492 assert(ExitVal->getParent() == L->getHeader() &&
493 "ExitVal must be in loop header");
494 MadeAnyChanges = true;
495 PN.setIncomingValue(IncomingValIdx,
496 ExitVal->getIncomingValue(PreheaderIdx));
497 SE->forgetValue(&PN);
498 }
499 }
500 }
501 }
502 return MadeAnyChanges;
503}
504
505//===----------------------------------------------------------------------===//
506// IV Widening - Extend the width of an IV to cover its widest uses.
507//===----------------------------------------------------------------------===//
508
509/// Update information about the induction variable that is extended by this
510/// sign or zero extend operation. This is used to determine the final width of
511/// the IV before actually widening it.
512static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
513 ScalarEvolution *SE,
514 const TargetTransformInfo *TTI) {
515 bool IsSigned = Cast->getOpcode() == Instruction::SExt;
516 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
517 return;
518
519 Type *Ty = Cast->getType();
520 uint64_t Width = SE->getTypeSizeInBits(Ty);
521 if (!Cast->getDataLayout().isLegalInteger(Width))
522 return;
523
524 // Check that `Cast` actually extends the induction variable (we rely on this
525 // later). This takes care of cases where `Cast` is extending a truncation of
526 // the narrow induction variable, and thus can end up being narrower than the
527 // "narrow" induction variable.
528 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
529 if (NarrowIVWidth >= Width)
530 return;
531
532 // Cast is either an sext or zext up to this point.
533 // We should not widen an indvar if arithmetics on the wider indvar are more
534 // expensive than those on the narrower indvar. We check only the cost of ADD
535 // because at least an ADD is required to increment the induction variable. We
536 // could compute more comprehensively the cost of all instructions on the
537 // induction variable when necessary.
538 if (TTI &&
539 TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
540 TTI->getArithmeticInstrCost(Instruction::Add,
541 Cast->getOperand(0)->getType())) {
542 return;
543 }
544
545 if (!WI.WidestNativeType ||
546 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
548 WI.IsSigned = IsSigned;
549 return;
550 }
551
552 // We extend the IV to satisfy the sign of its user(s), or 'signed'
553 // if there are multiple users with both sign- and zero extensions,
554 // in order not to introduce nondeterministic behaviour based on the
555 // unspecified order of a PHI nodes' users-iterator.
556 WI.IsSigned |= IsSigned;
557}
558
559//===----------------------------------------------------------------------===//
560// Live IV Reduction - Minimize IVs live across the loop.
561//===----------------------------------------------------------------------===//
562
563//===----------------------------------------------------------------------===//
564// Simplification of IV users based on SCEV evaluation.
565//===----------------------------------------------------------------------===//
566
567namespace {
568
569class IndVarSimplifyVisitor : public IVVisitor {
570 ScalarEvolution *SE;
571 const TargetTransformInfo *TTI;
572 PHINode *IVPhi;
573
574public:
575 WideIVInfo WI;
576
577 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
578 const TargetTransformInfo *TTI,
579 const DominatorTree *DTree)
580 : SE(SCEV), TTI(TTI), IVPhi(IV) {
581 DT = DTree;
582 WI.NarrowIV = IVPhi;
583 }
584
585 // Implement the interface used by simplifyUsersOfIV.
586 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
587};
588
589} // end anonymous namespace
590
591/// Iteratively perform simplification on a worklist of IV users. Each
592/// successive simplification may push more users which may themselves be
593/// candidates for simplification.
594///
595/// Sign/Zero extend elimination is interleaved with IV simplification.
596bool IndVarSimplify::simplifyAndExtend(Loop *L,
597 SCEVExpander &Rewriter,
598 LoopInfo *LI) {
600
601 auto *GuardDecl = Intrinsic::getDeclarationIfExists(
602 L->getBlocks()[0]->getModule(), Intrinsic::experimental_guard);
603 bool HasGuards = GuardDecl && !GuardDecl->use_empty();
604
606 llvm::make_pointer_range(L->getHeader()->phis()));
607
608 // Each round of simplification iterates through the SimplifyIVUsers worklist
609 // for all current phis, then determines whether any IVs can be
610 // widened. Widening adds new phis to LoopPhis, inducing another round of
611 // simplification on the wide IVs.
612 bool Changed = false;
613 while (!LoopPhis.empty()) {
614 // Evaluate as many IV expressions as possible before widening any IVs. This
615 // forces SCEV to set no-wrap flags before evaluating sign/zero
616 // extension. The first time SCEV attempts to normalize sign/zero extension,
617 // the result becomes final. So for the most predictable results, we delay
618 // evaluation of sign/zero extend evaluation until needed, and avoid running
619 // other SCEV based analysis prior to simplifyAndExtend.
620 do {
621 PHINode *CurrIV = LoopPhis.pop_back_val();
622
623 // Information about sign/zero extensions of CurrIV.
624 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
625
626 const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts,
627 Rewriter, &Visitor);
628
629 Changed |= C;
630 RunUnswitching |= U;
631 if (Visitor.WI.WidestNativeType) {
632 WideIVs.push_back(Visitor.WI);
633 }
634 } while(!LoopPhis.empty());
635
636 // Continue if we disallowed widening.
637 if (!WidenIndVars)
638 continue;
639
640 for (; !WideIVs.empty(); WideIVs.pop_back()) {
641 unsigned ElimExt;
642 unsigned Widened;
643 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
644 DT, DeadInsts, ElimExt, Widened,
645 HasGuards, UsePostIncrementRanges)) {
646 NumElimExt += ElimExt;
647 NumWidened += Widened;
648 Changed = true;
649 LoopPhis.push_back(WidePhi);
650 }
651 }
652 }
653 return Changed;
654}
655
656//===----------------------------------------------------------------------===//
657// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
658//===----------------------------------------------------------------------===//
659
660/// Given an Value which is hoped to be part of an add recurance in the given
661/// loop, return the associated Phi node if so. Otherwise, return null. Note
662/// that this is less general than SCEVs AddRec checking.
665 if (!IncI)
666 return nullptr;
667
668 switch (IncI->getOpcode()) {
669 case Instruction::Add:
670 case Instruction::Sub:
671 break;
672 case Instruction::GetElementPtr:
673 // An IV counter must preserve its type.
674 if (IncI->getNumOperands() == 2)
675 break;
676 [[fallthrough]];
677 default:
678 return nullptr;
679 }
680
681 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
682 if (Phi && Phi->getParent() == L->getHeader()) {
683 if (L->isLoopInvariant(IncI->getOperand(1)))
684 return Phi;
685 return nullptr;
686 }
687 if (IncI->getOpcode() == Instruction::GetElementPtr)
688 return nullptr;
689
690 // Allow add/sub to be commuted.
691 Phi = dyn_cast<PHINode>(IncI->getOperand(1));
692 if (Phi && Phi->getParent() == L->getHeader()) {
693 if (L->isLoopInvariant(IncI->getOperand(0)))
694 return Phi;
695 }
696 return nullptr;
697}
698
699/// Whether the current loop exit test is based on this value. Currently this
700/// is limited to a direct use in the loop condition.
701static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
702 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
704 // TODO: Allow non-icmp loop test.
705 if (!ICmp)
706 return false;
707
708 // TODO: Allow indirect use.
709 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
710}
711
712/// linearFunctionTestReplace policy. Return true unless we can show that the
713/// current exit test is already sufficiently canonical.
714static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
715 assert(L->getLoopLatch() && "Must be in simplified form");
716
717 // Avoid converting a constant or loop invariant test back to a runtime
718 // test. This is critical for when SCEV's cached ExitCount is less precise
719 // than the current IR (such as after we've proven a particular exit is
720 // actually dead and thus the BE count never reaches our ExitCount.)
721 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
722 if (L->isLoopInvariant(BI->getCondition()))
723 return false;
724
725 // Do LFTR to simplify the exit condition to an ICMP.
727 if (!Cond)
728 return true;
729
730 // Do LFTR to simplify the exit ICMP to EQ/NE
731 ICmpInst::Predicate Pred = Cond->getPredicate();
732 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
733 return true;
734
735 // Look for a loop invariant RHS
736 Value *LHS = Cond->getOperand(0);
737 Value *RHS = Cond->getOperand(1);
738 if (!L->isLoopInvariant(RHS)) {
739 if (!L->isLoopInvariant(LHS))
740 return true;
741 std::swap(LHS, RHS);
742 }
743 // Look for a simple IV counter LHS
745 if (!Phi)
746 Phi = getLoopPhiForCounter(LHS, L);
747
748 if (!Phi)
749 return true;
750
751 // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
752 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
753 if (Idx < 0)
754 return true;
755
756 // Do LFTR if the exit condition's IV is *not* a simple counter.
757 Value *IncV = Phi->getIncomingValue(Idx);
758 return Phi != getLoopPhiForCounter(IncV, L);
759}
760
761/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
762/// down to checking that all operands are constant and listing instructions
763/// that may hide undef.
765 unsigned Depth) {
766 if (isa<Constant>(V))
767 return !isa<UndefValue>(V);
768
769 if (Depth >= 6)
770 return false;
771
772 // Conservatively handle non-constant non-instructions. For example, Arguments
773 // may be undef.
775 if (!I)
776 return false;
777
778 // Load and return values may be undef.
779 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
780 return false;
781
782 // Optimistically handle other instructions.
783 for (Value *Op : I->operands()) {
784 if (!Visited.insert(Op).second)
785 continue;
786 if (!hasConcreteDefImpl(Op, Visited, Depth+1))
787 return false;
788 }
789 return true;
790}
791
792/// Return true if the given value is concrete. We must prove that undef can
793/// never reach it.
794///
795/// TODO: If we decide that this is a good approach to checking for undef, we
796/// may factor it into a common location.
797static bool hasConcreteDef(Value *V) {
799 Visited.insert(V);
800 return hasConcreteDefImpl(V, Visited, 0);
801}
802
803/// Return true if the given phi is a "counter" in L. A counter is an
804/// add recurance (of integer or pointer type) with an arbitrary start, and a
805/// step of 1. Note that L must have exactly one latch.
806static bool isLoopCounter(PHINode* Phi, Loop *L,
807 ScalarEvolution *SE) {
808 assert(Phi->getParent() == L->getHeader());
809 assert(L->getLoopLatch());
810
811 if (!SE->isSCEVable(Phi->getType()))
812 return false;
813
814 const SCEV *S = SE->getSCEV(Phi);
816 return false;
817
818 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
819 Value *IncV = Phi->getIncomingValue(LatchIdx);
820 return (getLoopPhiForCounter(IncV, L) == Phi &&
821 isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
822}
823
824/// Search the loop header for a loop counter (anadd rec w/step of one)
825/// suitable for use by LFTR. If multiple counters are available, select the
826/// "best" one based profitable heuristics.
827///
828/// BECount may be an i8* pointer type. The pointer difference is already
829/// valid count without scaling the address stride, so it remains a pointer
830/// expression as far as SCEV is concerned.
831static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
832 const SCEV *BECount,
834 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
835
836 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
837
838 // Loop over all of the PHI nodes, looking for a simple counter.
839 PHINode *BestPhi = nullptr;
840 const SCEV *BestInit = nullptr;
841 BasicBlock *LatchBlock = L->getLoopLatch();
842 assert(LatchBlock && "Must be in simplified form");
843 const DataLayout &DL = L->getHeader()->getDataLayout();
844
845 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
846 PHINode *Phi = cast<PHINode>(I);
847 if (!isLoopCounter(Phi, L, SE))
848 continue;
849
850 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
851
852 // AR may be a pointer type, while BECount is an integer type.
853 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
854 // AR may not be a narrower type, or we may never exit.
855 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
856 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
857 continue;
858
859 // Avoid reusing a potentially undef value to compute other values that may
860 // have originally had a concrete definition.
861 if (!hasConcreteDef(Phi)) {
862 // We explicitly allow unknown phis as long as they are already used by
863 // the loop exit test. This is legal since performing LFTR could not
864 // increase the number of undef users.
865 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
866 if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
867 !isLoopExitTestBasedOn(IncPhi, ExitingBB))
868 continue;
869 }
870
871 // Avoid introducing undefined behavior due to poison which didn't exist in
872 // the original program. (Annoyingly, the rules for poison and undef
873 // propagation are distinct, so this does NOT cover the undef case above.)
874 // We have to ensure that we don't introduce UB by introducing a use on an
875 // iteration where said IV produces poison. Our strategy here differs for
876 // pointers and integer IVs. For integers, we strip and reinfer as needed,
877 // see code in linearFunctionTestReplace. For pointers, we restrict
878 // transforms as there is no good way to reinfer inbounds once lost.
879 if (!Phi->getType()->isIntegerTy() &&
880 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
881 continue;
882
883 const SCEV *Init = AR->getStart();
884
885 if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) {
886 // Don't force a live loop counter if another IV can be used.
887 if (isAlmostDeadIV(Phi, LatchBlock, Cond))
888 continue;
889
890 // Prefer to count-from-zero. This is a more "canonical" counter form. It
891 // also prefers integer to pointer IVs.
892 if (BestInit->isZero() != Init->isZero()) {
893 if (BestInit->isZero())
894 continue;
895 }
896 // If two IVs both count from zero or both count from nonzero then the
897 // narrower is likely a dead phi that has been widened. Use the wider phi
898 // to allow the other to be eliminated.
899 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
900 continue;
901 }
902 BestPhi = Phi;
903 BestInit = Init;
904 }
905 return BestPhi;
906}
907
908/// Insert an IR expression which computes the value held by the IV IndVar
909/// (which must be an loop counter w/unit stride) after the backedge of loop L
910/// is taken ExitCount times.
911static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
912 const SCEV *ExitCount, bool UsePostInc, Loop *L,
913 SCEVExpander &Rewriter, ScalarEvolution *SE) {
914 assert(isLoopCounter(IndVar, L, SE));
915 assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
916 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
917 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
918
919 // For integer IVs, truncate the IV before computing the limit unless we
920 // know apriori that the limit must be a constant when evaluated in the
921 // bitwidth of the IV. We prefer (potentially) keeping a truncate of the
922 // IV in the loop over a (potentially) expensive expansion of the widened
923 // exit count add(zext(add)) expression.
924 if (IndVar->getType()->isIntegerTy() &&
925 SE->getTypeSizeInBits(AR->getType()) >
926 SE->getTypeSizeInBits(ExitCount->getType())) {
927 const SCEV *IVInit = AR->getStart();
928 if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
929 AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
930 }
931
932 const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
933 const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
934 assert(SE->isLoopInvariant(IVLimit, L) &&
935 "Computed iteration count is not loop invariant!");
936 return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
937 ExitingBB->getTerminator());
938}
939
940/// This method rewrites the exit condition of the loop to be a canonical !=
941/// comparison against the incremented loop induction variable. This pass is
942/// able to rewrite the exit tests of any loop where the SCEV analysis can
943/// determine a loop-invariant trip count of the loop, which is actually a much
944/// broader range than just linear tests.
945bool IndVarSimplify::
946linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
947 const SCEV *ExitCount,
948 PHINode *IndVar, SCEVExpander &Rewriter) {
949 assert(L->getLoopLatch() && "Loop no longer in simplified form?");
950 assert(isLoopCounter(IndVar, L, SE));
951 Instruction * const IncVar =
952 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
953
954 // Initialize CmpIndVar to the preincremented IV.
955 Value *CmpIndVar = IndVar;
956 bool UsePostInc = false;
957
958 // If the exiting block is the same as the backedge block, we prefer to
959 // compare against the post-incremented value, otherwise we must compare
960 // against the preincremented value.
961 if (ExitingBB == L->getLoopLatch()) {
962 // For pointer IVs, we chose to not strip inbounds which requires us not
963 // to add a potentially UB introducing use. We need to either a) show
964 // the loop test we're modifying is already in post-inc form, or b) show
965 // that adding a use must not introduce UB.
966 bool SafeToPostInc =
967 IndVar->getType()->isIntegerTy() ||
968 isLoopExitTestBasedOn(IncVar, ExitingBB) ||
969 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
970 if (SafeToPostInc) {
971 UsePostInc = true;
972 CmpIndVar = IncVar;
973 }
974 }
975
976 // It may be necessary to drop nowrap flags on the incrementing instruction
977 // if either LFTR moves from a pre-inc check to a post-inc check (in which
978 // case the increment might have previously been poison on the last iteration
979 // only) or if LFTR switches to a different IV that was previously dynamically
980 // dead (and as such may be arbitrarily poison). We remove any nowrap flags
981 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
982 // check), because the pre-inc addrec flags may be adopted from the original
983 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
984 // TODO: This handling is inaccurate for one case: If we switch to a
985 // dynamically dead IV that wraps on the first loop iteration only, which is
986 // not covered by the post-inc addrec. (If the new IV was not dynamically
987 // dead, it could not be poison on the first iteration in the first place.)
988 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
989 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
990 if (BO->hasNoUnsignedWrap())
991 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
992 if (BO->hasNoSignedWrap())
993 BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
994 }
995
996 Value *ExitCnt = genLoopLimit(
997 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
998 assert(ExitCnt->getType()->isPointerTy() ==
999 IndVar->getType()->isPointerTy() &&
1000 "genLoopLimit missed a cast");
1001
1002 // Insert a new icmp_ne or icmp_eq instruction before the branch.
1003 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1004 ICmpInst::Predicate P;
1005 if (L->contains(BI->getSuccessor(0)))
1006 P = ICmpInst::ICMP_NE;
1007 else
1008 P = ICmpInst::ICMP_EQ;
1009
1010 IRBuilder<> Builder(BI);
1011
1012 // The new loop exit condition should reuse the debug location of the
1013 // original loop exit condition.
1014 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1015 Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1016
1017 // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1018 // avoid the expensive expansion of the limit expression in the wider type,
1019 // emit a truncate to narrow the IV to the ExitCount type. This is safe
1020 // since we know (from the exit count bitwidth), that we can't self-wrap in
1021 // the narrower type.
1022 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1023 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1024 if (CmpIndVarSize > ExitCntSize) {
1025 assert(!CmpIndVar->getType()->isPointerTy() &&
1026 !ExitCnt->getType()->isPointerTy());
1027
1028 // Before resorting to actually inserting the truncate, use the same
1029 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1030 // the other side of the comparison instead. We still evaluate the limit
1031 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1032 // a truncate within in.
1033 bool Extended = false;
1034 const SCEV *IV = SE->getSCEV(CmpIndVar);
1035 const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType());
1036 const SCEV *ZExtTrunc =
1037 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1038
1039 if (ZExtTrunc == IV) {
1040 Extended = true;
1041 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1042 "wide.trip.count");
1043 } else {
1044 const SCEV *SExtTrunc =
1045 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1046 if (SExtTrunc == IV) {
1047 Extended = true;
1048 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1049 "wide.trip.count");
1050 }
1051 }
1052
1053 if (Extended) {
1054 bool Discard;
1055 L->makeLoopInvariant(ExitCnt, Discard);
1056 } else
1057 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1058 "lftr.wideiv");
1059 }
1060 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1061 << " LHS:" << *CmpIndVar << '\n'
1062 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1063 << "\n"
1064 << " RHS:\t" << *ExitCnt << "\n"
1065 << "ExitCount:\t" << *ExitCount << "\n"
1066 << " was: " << *BI->getCondition() << "\n");
1067
1068 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1069 Value *OrigCond = BI->getCondition();
1070 // It's tempting to use replaceAllUsesWith here to fully replace the old
1071 // comparison, but that's not immediately safe, since users of the old
1072 // comparison may not be dominated by the new comparison. Instead, just
1073 // update the branch to use the new comparison; in the common case this
1074 // will make old comparison dead.
1075 BI->setCondition(Cond);
1076 DeadInsts.emplace_back(OrigCond);
1077
1078 ++NumLFTR;
1079 return true;
1080}
1081
1082//===----------------------------------------------------------------------===//
1083// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1084//===----------------------------------------------------------------------===//
1085
1086/// If there's a single exit block, sink any loop-invariant values that
1087/// were defined in the preheader but not used inside the loop into the
1088/// exit block to reduce register pressure in the loop.
1089bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1090 BasicBlock *ExitBlock = L->getExitBlock();
1091 if (!ExitBlock) return false;
1092
1093 BasicBlock *Preheader = L->getLoopPreheader();
1094 if (!Preheader) return false;
1095
1096 bool MadeAnyChanges = false;
1097 for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
1098
1099 // Skip BB Terminator.
1100 if (Preheader->getTerminator() == &I)
1101 continue;
1102
1103 // New instructions were inserted at the end of the preheader.
1104 if (isa<PHINode>(I))
1105 break;
1106
1107 // Don't move instructions which might have side effects, since the side
1108 // effects need to complete before instructions inside the loop. Also don't
1109 // move instructions which might read memory, since the loop may modify
1110 // memory. Note that it's okay if the instruction might have undefined
1111 // behavior: LoopSimplify guarantees that the preheader dominates the exit
1112 // block.
1113 if (I.mayHaveSideEffects() || I.mayReadFromMemory())
1114 continue;
1115
1116 // Skip debug or pseudo instructions.
1117 if (I.isDebugOrPseudoInst())
1118 continue;
1119
1120 // Skip eh pad instructions.
1121 if (I.isEHPad())
1122 continue;
1123
1124 // Don't sink alloca: we never want to sink static alloca's out of the
1125 // entry block, and correctly sinking dynamic alloca's requires
1126 // checks for stacksave/stackrestore intrinsics.
1127 // FIXME: Refactor this check somehow?
1128 if (isa<AllocaInst>(&I))
1129 continue;
1130
1131 // Determine if there is a use in or before the loop (direct or
1132 // otherwise).
1133 bool UsedInLoop = false;
1134 for (Use &U : I.uses()) {
1135 Instruction *User = cast<Instruction>(U.getUser());
1136 BasicBlock *UseBB = User->getParent();
1137 if (PHINode *P = dyn_cast<PHINode>(User)) {
1138 unsigned i =
1140 UseBB = P->getIncomingBlock(i);
1141 }
1142 if (UseBB == Preheader || L->contains(UseBB)) {
1143 UsedInLoop = true;
1144 break;
1145 }
1146 }
1147
1148 // If there is, the def must remain in the preheader.
1149 if (UsedInLoop)
1150 continue;
1151
1152 // Otherwise, sink it to the exit block.
1153 I.moveBefore(ExitBlock->getFirstInsertionPt());
1154 SE->forgetValue(&I);
1155 MadeAnyChanges = true;
1156 }
1157
1158 return MadeAnyChanges;
1159}
1160
1161static void replaceExitCond(BranchInst *BI, Value *NewCond,
1163 auto *OldCond = BI->getCondition();
1164 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1165 << " with " << *NewCond << "\n");
1166 BI->setCondition(NewCond);
1167 if (OldCond->use_empty())
1168 DeadInsts.emplace_back(OldCond);
1169}
1170
1171static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1172 bool IsTaken) {
1173 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1174 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1175 auto *OldCond = BI->getCondition();
1176 return ConstantInt::get(OldCond->getType(),
1177 IsTaken ? ExitIfTrue : !ExitIfTrue);
1178}
1179
1180static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1182 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1183 auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1184 replaceExitCond(BI, NewCond, DeadInsts);
1185}
1186
1188 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1189 ScalarEvolution &SE) {
1190 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1191 auto *LoopPreheader = L->getLoopPreheader();
1192 auto *LoopHeader = L->getHeader();
1194 for (auto &PN : LoopHeader->phis()) {
1195 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1196 for (User *U : PN.users())
1197 Worklist.push_back(cast<Instruction>(U));
1198 SE.forgetValue(&PN);
1199 PN.replaceAllUsesWith(PreheaderIncoming);
1200 DeadInsts.emplace_back(&PN);
1201 }
1202
1203 // Replacing with the preheader value will often allow IV users to simplify
1204 // (especially if the preheader value is a constant).
1206 while (!Worklist.empty()) {
1207 auto *I = cast<Instruction>(Worklist.pop_back_val());
1208 if (!Visited.insert(I).second)
1209 continue;
1210
1211 // Don't simplify instructions outside the loop.
1212 if (!L->contains(I))
1213 continue;
1214
1215 Value *Res = simplifyInstruction(I, I->getDataLayout());
1216 if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1217 for (User *U : I->users())
1218 Worklist.push_back(cast<Instruction>(U));
1219 I->replaceAllUsesWith(Res);
1220 DeadInsts.emplace_back(I);
1221 }
1222 }
1223}
1224
1225static Value *
1228 SCEVExpander &Rewriter) {
1229 ICmpInst::Predicate InvariantPred = LIP.Pred;
1230 BasicBlock *Preheader = L->getLoopPreheader();
1231 assert(Preheader && "Preheader doesn't exist");
1232 Rewriter.setInsertPoint(Preheader->getTerminator());
1233 auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1234 auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1235 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1236 if (ExitIfTrue)
1237 InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1238 IRBuilder<> Builder(Preheader->getTerminator());
1239 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1240 return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1241 BI->getCondition()->getName());
1242}
1243
1244static std::optional<Value *>
1245createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1246 const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1247 ScalarEvolution *SE, SCEVExpander &Rewriter) {
1248 CmpPredicate Pred = ICmp->getCmpPredicate();
1249 Value *LHS = ICmp->getOperand(0);
1250 Value *RHS = ICmp->getOperand(1);
1251
1252 // 'LHS pred RHS' should now mean that we stay in loop.
1253 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1254 if (Inverted)
1256
1257 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1258 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1259 // Can we prove it to be trivially true or false?
1260 if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1261 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1262
1263 auto *ARTy = LHSS->getType();
1264 auto *MaxIterTy = MaxIter->getType();
1265 // If possible, adjust types.
1266 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1267 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1268 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1269 const SCEV *MinusOne = SE->getMinusOne(ARTy);
1270 const SCEV *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1271 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1272 MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1273 }
1274
1275 if (SkipLastIter) {
1276 // Semantically skip last iter is "subtract 1, do not bother about unsigned
1277 // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1278 // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1279 // So we manually construct umin(a - 1, b - 1).
1281 if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1282 for (const SCEV *Op : UMin->operands())
1283 Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1284 MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1285 } else
1286 MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1287 }
1288
1289 // Check if there is a loop-invariant predicate equivalent to our check.
1290 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1291 L, BI, MaxIter);
1292 if (!LIP)
1293 return std::nullopt;
1294
1295 // Can we prove it to be trivially true?
1296 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1297 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1298 else
1299 return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1300}
1301
1303 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1304 bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1306 assert(
1307 (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1308 "Not a loop exit!");
1309
1310 // For branch that stays in loop by TRUE condition, go through AND. For branch
1311 // that stays in loop by FALSE condition, go through OR. Both gives the
1312 // similar logic: "stay in loop iff all conditions are true(false)".
1313 bool Inverted = L->contains(BI->getSuccessor(1));
1314 SmallVector<ICmpInst *, 4> LeafConditions;
1315 SmallVector<Value *, 4> Worklist;
1317 Value *OldCond = BI->getCondition();
1318 Visited.insert(OldCond);
1319 Worklist.push_back(OldCond);
1320
1321 auto GoThrough = [&](Value *V) {
1322 Value *LHS = nullptr, *RHS = nullptr;
1323 if (Inverted) {
1324 if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1325 return false;
1326 } else {
1327 if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1328 return false;
1329 }
1330 if (Visited.insert(LHS).second)
1331 Worklist.push_back(LHS);
1332 if (Visited.insert(RHS).second)
1333 Worklist.push_back(RHS);
1334 return true;
1335 };
1336
1337 do {
1338 Value *Curr = Worklist.pop_back_val();
1339 // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1340 // those with one use, to avoid instruction duplication.
1341 if (Curr->hasOneUse())
1342 if (!GoThrough(Curr))
1343 if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1344 LeafConditions.push_back(ICmp);
1345 } while (!Worklist.empty());
1346
1347 // If the current basic block has the same exit count as the whole loop, and
1348 // it consists of multiple icmp's, try to collect all icmp's that give exact
1349 // same exit count. For all other icmp's, we could use one less iteration,
1350 // because their value on the last iteration doesn't really matter.
1351 SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1352 if (!SkipLastIter && LeafConditions.size() > 1 &&
1353 SE->getExitCount(L, ExitingBB,
1355 MaxIter)
1356 for (auto *ICmp : LeafConditions) {
1357 auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1358 /*ControlsExit*/ false);
1359 const SCEV *ExitMax = EL.SymbolicMaxNotTaken;
1360 if (isa<SCEVCouldNotCompute>(ExitMax))
1361 continue;
1362 // They could be of different types (specifically this happens after
1363 // IV widening).
1364 auto *WiderType =
1365 SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1366 const SCEV *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1367 const SCEV *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1368 if (WideExitMax == WideMaxIter)
1369 ICmpsFailingOnLastIter.insert(ICmp);
1370 }
1371
1372 bool Changed = false;
1373 for (auto *OldCond : LeafConditions) {
1374 // Skip last iteration for this icmp under one of two conditions:
1375 // - We do it for all conditions;
1376 // - There is another ICmp that would fail on last iter, so this one doesn't
1377 // really matter.
1378 bool OptimisticSkipLastIter = SkipLastIter;
1379 if (!OptimisticSkipLastIter) {
1380 if (ICmpsFailingOnLastIter.size() > 1)
1381 OptimisticSkipLastIter = true;
1382 else if (ICmpsFailingOnLastIter.size() == 1)
1383 OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1384 }
1385 if (auto Replaced =
1386 createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1387 OptimisticSkipLastIter, SE, Rewriter)) {
1388 Changed = true;
1389 auto *NewCond = *Replaced;
1390 if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1391 NCI->setName(OldCond->getName() + ".first_iter");
1392 }
1393 LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1394 << " with " << *NewCond << "\n");
1395 assert(OldCond->hasOneUse() && "Must be!");
1396 OldCond->replaceAllUsesWith(NewCond);
1397 DeadInsts.push_back(OldCond);
1398 // Make sure we no longer consider this condition as failing on last
1399 // iteration.
1400 ICmpsFailingOnLastIter.erase(OldCond);
1401 }
1402 }
1403 return Changed;
1404}
1405
1406bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1407 // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1408 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1409 // never reaches the icmp since the zext doesn't fold to an AddRec unless
1410 // it already has flags. The alternative to this would be to extending the
1411 // set of "interesting" IV users to include the icmp, but doing that
1412 // regresses results in practice by querying SCEVs before trip counts which
1413 // rely on them which results in SCEV caching sub-optimal answers. The
1414 // concern about caching sub-optimal results is why we only query SCEVs of
1415 // the loop invariant RHS here.
1416 SmallVector<BasicBlock*, 16> ExitingBlocks;
1417 L->getExitingBlocks(ExitingBlocks);
1418 bool Changed = false;
1419 for (auto *ExitingBB : ExitingBlocks) {
1420 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1421 if (!BI)
1422 continue;
1423 assert(BI->isConditional() && "exit branch must be conditional");
1424
1425 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1426 if (!ICmp || !ICmp->hasOneUse())
1427 continue;
1428
1429 auto *LHS = ICmp->getOperand(0);
1430 auto *RHS = ICmp->getOperand(1);
1431 // For the range reasoning, avoid computing SCEVs in the loop to avoid
1432 // poisoning cache with sub-optimal results. For the must-execute case,
1433 // this is a neccessary precondition for correctness.
1434 if (!L->isLoopInvariant(RHS)) {
1435 if (!L->isLoopInvariant(LHS))
1436 continue;
1437 // Same logic applies for the inverse case
1438 std::swap(LHS, RHS);
1439 }
1440
1441 // Match (icmp signed-cond zext, RHS)
1442 Value *LHSOp = nullptr;
1443 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1444 continue;
1445
1446 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1447 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1448 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1449 FullCR = FullCR.zeroExtend(OuterBitWidth);
1450 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1451 if (FullCR.contains(RHSCR)) {
1452 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1453 // replace the signed condition with the unsigned version.
1454 ICmp->setPredicate(ICmp->getUnsignedPredicate());
1455 Changed = true;
1456 // Note: No SCEV invalidation needed. We've changed the predicate, but
1457 // have not changed exit counts, or the values produced by the compare.
1458 continue;
1459 }
1460 }
1461
1462 // Now that we've canonicalized the condition to match the extend,
1463 // see if we can rotate the extend out of the loop.
1464 for (auto *ExitingBB : ExitingBlocks) {
1465 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1466 if (!BI)
1467 continue;
1468 assert(BI->isConditional() && "exit branch must be conditional");
1469
1470 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1471 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1472 continue;
1473
1474 bool Swapped = false;
1475 auto *LHS = ICmp->getOperand(0);
1476 auto *RHS = ICmp->getOperand(1);
1477 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1478 // Nothing to rotate
1479 continue;
1480 if (L->isLoopInvariant(LHS)) {
1481 // Same logic applies for the inverse case until we actually pick
1482 // which operand of the compare to update.
1483 Swapped = true;
1484 std::swap(LHS, RHS);
1485 }
1486 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1487
1488 // Match (icmp unsigned-cond zext, RHS)
1489 // TODO: Extend to handle corresponding sext/signed-cmp case
1490 // TODO: Extend to other invertible functions
1491 Value *LHSOp = nullptr;
1492 if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1493 continue;
1494
1495 // In general, we only rotate if we can do so without increasing the number
1496 // of instructions. The exception is when we have an zext(add-rec). The
1497 // reason for allowing this exception is that we know we need to get rid
1498 // of the zext for SCEV to be able to compute a trip count for said loops;
1499 // we consider the new trip count valuable enough to increase instruction
1500 // count by one.
1501 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1502 continue;
1503
1504 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1505 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1506 // when zext is loop varying and RHS is loop invariant. This converts
1507 // loop varying work to loop-invariant work.
1508 auto doRotateTransform = [&]() {
1509 assert(ICmp->isUnsigned() && "must have proven unsigned already");
1510 auto *NewRHS = CastInst::Create(
1511 Instruction::Trunc, RHS, LHSOp->getType(), "",
1512 L->getLoopPreheader()->getTerminator()->getIterator());
1513 // NewRHS is an operation that has been hoisted out of the loop, and
1514 // therefore should have a dropped location.
1515 NewRHS->setDebugLoc(DebugLoc::getDropped());
1516 ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1517 ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1518 // Samesign flag cannot be preserved after narrowing the compare.
1519 ICmp->setSameSign(false);
1520 if (LHS->use_empty())
1521 DeadInsts.push_back(LHS);
1522 };
1523
1524 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1525 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1526 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1527 FullCR = FullCR.zeroExtend(OuterBitWidth);
1528 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1529 if (FullCR.contains(RHSCR)) {
1530 doRotateTransform();
1531 Changed = true;
1532 // Note, we are leaving SCEV in an unfortunately imprecise case here
1533 // as rotation tends to reveal information about trip counts not
1534 // previously visible.
1535 continue;
1536 }
1537 }
1538
1539 return Changed;
1540}
1541
1542bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1543 SmallVector<BasicBlock*, 16> ExitingBlocks;
1544 L->getExitingBlocks(ExitingBlocks);
1545
1546 // Remove all exits which aren't both rewriteable and execute on every
1547 // iteration.
1548 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1549 // If our exitting block exits multiple loops, we can only rewrite the
1550 // innermost one. Otherwise, we're changing how many times the innermost
1551 // loop runs before it exits.
1552 if (LI->getLoopFor(ExitingBB) != L)
1553 return true;
1554
1555 // Can't rewrite non-branch yet.
1556 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1557 if (!BI)
1558 return true;
1559
1560 // Likewise, the loop latch must be dominated by the exiting BB.
1561 if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1562 return true;
1563
1564 if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1565 // If already constant, nothing to do. However, if this is an
1566 // unconditional exit, we can still replace header phis with their
1567 // preheader value.
1568 if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1569 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1570 return true;
1571 }
1572
1573 return false;
1574 });
1575
1576 if (ExitingBlocks.empty())
1577 return false;
1578
1579 // Get a symbolic upper bound on the loop backedge taken count.
1580 const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1581 if (isa<SCEVCouldNotCompute>(MaxBECount))
1582 return false;
1583
1584 // Visit our exit blocks in order of dominance. We know from the fact that
1585 // all exits must dominate the latch, so there is a total dominance order
1586 // between them.
1587 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1588 // std::sort sorts in ascending order, so we want the inverse of
1589 // the normal dominance relation.
1590 if (A == B) return false;
1591 if (DT->properlyDominates(A, B))
1592 return true;
1593 else {
1594 assert(DT->properlyDominates(B, A) &&
1595 "expected total dominance order!");
1596 return false;
1597 }
1598 });
1599#ifdef ASSERT
1600 for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1601 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1602 }
1603#endif
1604
1605 bool Changed = false;
1606 bool SkipLastIter = false;
1607 const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1608 auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1609 if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1610 return;
1611 if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1612 CurrMaxExit = MaxExitCount;
1613 else
1614 CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1615 // If the loop has more than 1 iteration, all further checks will be
1616 // executed 1 iteration less.
1617 if (CurrMaxExit == MaxBECount)
1618 SkipLastIter = true;
1619 };
1620 SmallPtrSet<const SCEV *, 8> DominatingExactExitCounts;
1621 for (BasicBlock *ExitingBB : ExitingBlocks) {
1622 const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1623 const SCEV *MaxExitCount = SE->getExitCount(
1624 L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1625 if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1626 // Okay, we do not know the exit count here. Can we at least prove that it
1627 // will remain the same within iteration space?
1628 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1629 auto OptimizeCond = [&](bool SkipLastIter) {
1630 return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1631 MaxBECount, SkipLastIter,
1632 SE, Rewriter, DeadInsts);
1633 };
1634
1635 // TODO: We might have proved that we can skip the last iteration for
1636 // this check. In this case, we only want to check the condition on the
1637 // pre-last iteration (MaxBECount - 1). However, there is a nasty
1638 // corner case:
1639 //
1640 // for (i = len; i != 0; i--) { ... check (i ult X) ... }
1641 //
1642 // If we could not prove that len != 0, then we also could not prove that
1643 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1644 // OptimizeCond will likely not prove anything for it, even if it could
1645 // prove the same fact for len.
1646 //
1647 // As a temporary solution, we query both last and pre-last iterations in
1648 // hope that we will be able to prove triviality for at least one of
1649 // them. We can stop querying MaxBECount for this case once SCEV
1650 // understands that (MaxBECount - 1) will not overflow here.
1651 if (OptimizeCond(false))
1652 Changed = true;
1653 else if (SkipLastIter && OptimizeCond(true))
1654 Changed = true;
1655 UpdateSkipLastIter(MaxExitCount);
1656 continue;
1657 }
1658
1659 UpdateSkipLastIter(ExactExitCount);
1660
1661 // If we know we'd exit on the first iteration, rewrite the exit to
1662 // reflect this. This does not imply the loop must exit through this
1663 // exit; there may be an earlier one taken on the first iteration.
1664 // We know that the backedge can't be taken, so we replace all
1665 // the header PHIs with values coming from the preheader.
1666 if (ExactExitCount->isZero()) {
1667 foldExit(L, ExitingBB, true, DeadInsts);
1668 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1669 Changed = true;
1670 continue;
1671 }
1672
1673 assert(ExactExitCount->getType()->isIntegerTy() &&
1674 MaxBECount->getType()->isIntegerTy() &&
1675 "Exit counts must be integers");
1676
1677 Type *WiderType =
1678 SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1679 ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1680 MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1681 assert(MaxBECount->getType() == ExactExitCount->getType());
1682
1683 // Can we prove that some other exit must be taken strictly before this
1684 // one?
1685 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1686 ExactExitCount)) {
1687 foldExit(L, ExitingBB, false, DeadInsts);
1688 Changed = true;
1689 continue;
1690 }
1691
1692 // As we run, keep track of which exit counts we've encountered. If we
1693 // find a duplicate, we've found an exit which would have exited on the
1694 // exiting iteration, but (from the visit order) strictly follows another
1695 // which does the same and is thus dead.
1696 if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1697 foldExit(L, ExitingBB, false, DeadInsts);
1698 Changed = true;
1699 continue;
1700 }
1701
1702 // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1703 // here. If we kept track of the min of dominanting exits so far, we could
1704 // discharge exits with EC >= MDEC. This is less powerful than the existing
1705 // transform (since later exits aren't considered), but potentially more
1706 // powerful for any case where SCEV can prove a >=u b, but neither a == b
1707 // or a >u b. Such a case is not currently known.
1708 }
1709 return Changed;
1710}
1711
1712static bool crashingBBWithoutEffect(const BasicBlock &BB) {
1713 return llvm::all_of(BB, [](const Instruction &I) {
1714 // TODO: for now this is overly restrictive, to make sure nothing in this
1715 // BB can depend on the loop body.
1716 // It's not enough to check for !I.mayHaveSideEffects(), because e.g. a
1717 // load does not have a side effect, but we could have
1718 // %a = load ptr, ptr %ptr
1719 // %b = load i32, ptr %a
1720 // Now if the loop stored a non-nullptr to %a, we could cause a nullptr
1721 // dereference by skipping over loop iterations.
1722 if (const auto *CB = dyn_cast<CallBase>(&I)) {
1723 if (CB->onlyAccessesInaccessibleMemory())
1724 return true;
1725 }
1726 return isa<UnreachableInst>(I);
1727 });
1728}
1729
1730bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1731 SmallVector<BasicBlock*, 16> ExitingBlocks;
1732 L->getExitingBlocks(ExitingBlocks);
1733
1734 // Finally, see if we can rewrite our exit conditions into a loop invariant
1735 // form. If we have a read-only loop, and we can tell that we must exit down
1736 // a path which does not need any of the values computed within the loop, we
1737 // can rewrite the loop to exit on the first iteration. Note that this
1738 // doesn't either a) tell us the loop exits on the first iteration (unless
1739 // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1740 // This transformation looks a lot like a restricted form of dead loop
1741 // elimination, but restricted to read-only loops and without neccesssarily
1742 // needing to kill the loop entirely.
1743 if (!LoopPredication)
1744 return false;
1745
1746 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1747 // through *explicit* control flow. We have to eliminate the possibility of
1748 // implicit exits (see below) before we know it's truly exact.
1749 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1750 if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1751 return false;
1752
1753 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1754 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1755
1756 auto BadExit = [&](BasicBlock *ExitingBB) {
1757 // If our exiting block exits multiple loops, we can only rewrite the
1758 // innermost one. Otherwise, we're changing how many times the innermost
1759 // loop runs before it exits.
1760 if (LI->getLoopFor(ExitingBB) != L)
1761 return true;
1762
1763 // Can't rewrite non-branch yet.
1764 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1765 if (!BI)
1766 return true;
1767
1768 // If already constant, nothing to do.
1769 if (isa<Constant>(BI->getCondition()))
1770 return true;
1771
1772 // If the exit block has phis, we need to be able to compute the values
1773 // within the loop which contains them. This assumes trivially lcssa phis
1774 // have already been removed; TODO: generalize
1775 BasicBlock *ExitBlock =
1776 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1777 if (!ExitBlock->phis().empty())
1778 return true;
1779
1780 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1781 if (isa<SCEVCouldNotCompute>(ExitCount) ||
1782 !Rewriter.isSafeToExpand(ExitCount))
1783 return true;
1784
1785 assert(SE->isLoopInvariant(ExitCount, L) &&
1786 "Exit count must be loop invariant");
1787 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1788 return false;
1789 };
1790
1791 // Make sure all exits dominate the latch. This means there is a linear chain
1792 // of exits. We check this before sorting so we have a total order.
1793 BasicBlock *Latch = L->getLoopLatch();
1794 for (BasicBlock *ExitingBB : ExitingBlocks)
1795 if (!DT->dominates(ExitingBB, Latch))
1796 return false;
1797
1798 // If we have any exits which can't be predicated themselves, than we can't
1799 // predicate any exit which isn't guaranteed to execute before it. Consider
1800 // two exits (a) and (b) which would both exit on the same iteration. If we
1801 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1802 // we could convert a loop from exiting through (a) to one exiting through
1803 // (b). Note that this problem exists only for exits with the same exit
1804 // count, and we could be more aggressive when exit counts are known inequal.
1805 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1806 // llvm::sort sorts in ascending order, so we want the inverse of
1807 // the normal dominance relation.
1808 if (A == B)
1809 return false;
1810 if (DT->properlyDominates(A, B))
1811 return true;
1812 if (DT->properlyDominates(B, A))
1813 return false;
1814 llvm_unreachable("Should have total dominance order");
1815 });
1816
1817 // Make sure our exit blocks are really a total order (i.e. a linear chain of
1818 // exits before the backedge).
1819 for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1820 assert(DT->dominates(ExitingBlocks[i - 1], ExitingBlocks[i]) &&
1821 "Not sorted by dominance");
1822
1823 // Given our sorted total order, we know that exit[j] must be evaluated
1824 // after all exit[i] such j > i.
1825 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1826 if (BadExit(ExitingBlocks[i])) {
1827 ExitingBlocks.resize(i);
1828 break;
1829 }
1830
1831 if (ExitingBlocks.empty())
1832 return false;
1833
1834 // At this point, ExitingBlocks consists of only those blocks which are
1835 // predicatable. Given that, we know we have at least one exit we can
1836 // predicate if the loop is doesn't have side effects and doesn't have any
1837 // implicit exits (because then our exact BTC isn't actually exact).
1838 // @Reviewers - As structured, this is O(I^2) for loop nests. Any
1839 // suggestions on how to improve this? I can obviously bail out for outer
1840 // loops, but that seems less than ideal. MemorySSA can find memory writes,
1841 // is that enough for *all* side effects?
1842 bool HasThreadLocalSideEffects = false;
1843 for (BasicBlock *BB : L->blocks())
1844 for (auto &I : *BB)
1845 // TODO:isGuaranteedToTransfer
1846 if (I.mayHaveSideEffects()) {
1848 return false;
1849 HasThreadLocalSideEffects = true;
1850 if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
1851 // Simple stores cannot be observed by other threads.
1852 // If HasThreadLocalSideEffects is set, we check
1853 // crashingBBWithoutEffect to make sure that the crashing BB cannot
1854 // observe them either.
1855 if (!SI->isSimple())
1856 return false;
1857 } else {
1858 return false;
1859 }
1860 }
1861
1862 bool Changed = false;
1863 // Finally, do the actual predication for all predicatable blocks. A couple
1864 // of notes here:
1865 // 1) We don't bother to constant fold dominated exits with identical exit
1866 // counts; that's simply a form of CSE/equality propagation and we leave
1867 // it for dedicated passes.
1868 // 2) We insert the comparison at the branch. Hoisting introduces additional
1869 // legality constraints and we leave that to dedicated logic. We want to
1870 // predicate even if we can't insert a loop invariant expression as
1871 // peeling or unrolling will likely reduce the cost of the otherwise loop
1872 // varying check.
1873 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1874 IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1875 Value *ExactBTCV = nullptr; // Lazily generated if needed.
1876 for (BasicBlock *ExitingBB : ExitingBlocks) {
1877 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1878
1879 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1880 if (HasThreadLocalSideEffects) {
1881 const BasicBlock *Unreachable = nullptr;
1882 for (const BasicBlock *Succ : BI->successors()) {
1883 if (isa<UnreachableInst>(Succ->getTerminator()))
1884 Unreachable = Succ;
1885 }
1886 // Exit BB which have one branch back into the loop and another one to
1887 // a trap can still be optimized, because local side effects cannot
1888 // be observed in the exit case (the trap). We could be smarter about
1889 // this, but for now lets pattern match common cases that directly trap.
1890 if (Unreachable == nullptr || !crashingBBWithoutEffect(*Unreachable))
1891 return Changed;
1892 }
1893 Value *NewCond;
1894 if (ExitCount == ExactBTC) {
1895 NewCond = L->contains(BI->getSuccessor(0)) ?
1896 B.getFalse() : B.getTrue();
1897 } else {
1898 Value *ECV = Rewriter.expandCodeFor(ExitCount);
1899 if (!ExactBTCV)
1900 ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1901 Value *RHS = ExactBTCV;
1902 if (ECV->getType() != RHS->getType()) {
1903 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1904 ECV = B.CreateZExt(ECV, WiderTy);
1905 RHS = B.CreateZExt(RHS, WiderTy);
1906 }
1907 auto Pred = L->contains(BI->getSuccessor(0)) ?
1908 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1909 NewCond = B.CreateICmp(Pred, ECV, RHS);
1910 }
1911 Value *OldCond = BI->getCondition();
1912 BI->setCondition(NewCond);
1913 if (OldCond->use_empty())
1914 DeadInsts.emplace_back(OldCond);
1915 Changed = true;
1916 RunUnswitching = true;
1917 }
1918
1919 return Changed;
1920}
1921
1922//===----------------------------------------------------------------------===//
1923// IndVarSimplify driver. Manage several subpasses of IV simplification.
1924//===----------------------------------------------------------------------===//
1925
1926bool IndVarSimplify::run(Loop *L) {
1927 // We need (and expect!) the incoming loop to be in LCSSA.
1928 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1929 "LCSSA required to run indvars!");
1930
1931 // If LoopSimplify form is not available, stay out of trouble. Some notes:
1932 // - LSR currently only supports LoopSimplify-form loops. Indvars'
1933 // canonicalization can be a pessimization without LSR to "clean up"
1934 // afterwards.
1935 // - We depend on having a preheader; in particular,
1936 // Loop::getCanonicalInductionVariable only supports loops with preheaders,
1937 // and we're in trouble if we can't find the induction variable even when
1938 // we've manually inserted one.
1939 // - LFTR relies on having a single backedge.
1940 if (!L->isLoopSimplifyForm())
1941 return false;
1942
1943 bool Changed = false;
1944 // If there are any floating-point recurrences, attempt to
1945 // transform them to use integer recurrences.
1946 Changed |= rewriteNonIntegerIVs(L);
1947
1948 // Create a rewriter object which we'll use to transform the code with.
1949 SCEVExpander Rewriter(*SE, DL, "indvars");
1950#if LLVM_ENABLE_ABI_BREAKING_CHECKS
1951 Rewriter.setDebugType(DEBUG_TYPE);
1952#endif
1953
1954 // Eliminate redundant IV users.
1955 //
1956 // Simplification works best when run before other consumers of SCEV. We
1957 // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1958 // other expressions involving loop IVs have been evaluated. This helps SCEV
1959 // set no-wrap flags before normalizing sign/zero extension.
1960 Rewriter.disableCanonicalMode();
1961 Changed |= simplifyAndExtend(L, Rewriter, LI);
1962
1963 // Check to see if we can compute the final value of any expressions
1964 // that are recurrent in the loop, and substitute the exit values from the
1965 // loop into any instructions outside of the loop that use the final values
1966 // of the current expressions.
1967 if (ReplaceExitValue != NeverRepl) {
1968 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1969 ReplaceExitValue, DeadInsts)) {
1970 NumReplaced += Rewrites;
1971 Changed = true;
1972 }
1973 }
1974
1975 // Eliminate redundant IV cycles.
1976 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1977
1978 // Try to convert exit conditions to unsigned and rotate computation
1979 // out of the loop. Note: Handles invalidation internally if needed.
1980 Changed |= canonicalizeExitCondition(L);
1981
1982 // Try to eliminate loop exits based on analyzeable exit counts
1983 if (optimizeLoopExits(L, Rewriter)) {
1984 Changed = true;
1985 // Given we've changed exit counts, notify SCEV
1986 // Some nested loops may share same folded exit basic block,
1987 // thus we need to notify top most loop.
1988 SE->forgetTopmostLoop(L);
1989 }
1990
1991 // Try to form loop invariant tests for loop exits by changing how many
1992 // iterations of the loop run when that is unobservable.
1993 if (predicateLoopExits(L, Rewriter)) {
1994 Changed = true;
1995 // Given we've changed exit counts, notify SCEV
1996 SE->forgetLoop(L);
1997 }
1998
1999 // If we have a trip count expression, rewrite the loop's exit condition
2000 // using it.
2001 if (!DisableLFTR) {
2002 BasicBlock *PreHeader = L->getLoopPreheader();
2003
2004 SmallVector<BasicBlock*, 16> ExitingBlocks;
2005 L->getExitingBlocks(ExitingBlocks);
2006 for (BasicBlock *ExitingBB : ExitingBlocks) {
2007 // Can't rewrite non-branch yet.
2008 if (!isa<BranchInst>(ExitingBB->getTerminator()))
2009 continue;
2010
2011 // If our exitting block exits multiple loops, we can only rewrite the
2012 // innermost one. Otherwise, we're changing how many times the innermost
2013 // loop runs before it exits.
2014 if (LI->getLoopFor(ExitingBB) != L)
2015 continue;
2016
2017 if (!needsLFTR(L, ExitingBB))
2018 continue;
2019
2020 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2021 if (isa<SCEVCouldNotCompute>(ExitCount))
2022 continue;
2023
2024 // This was handled above, but as we form SCEVs, we can sometimes refine
2025 // existing ones; this allows exit counts to be folded to zero which
2026 // weren't when optimizeLoopExits saw them. Arguably, we should iterate
2027 // until stable to handle cases like this better.
2028 if (ExitCount->isZero())
2029 continue;
2030
2031 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2032 if (!IndVar)
2033 continue;
2034
2035 // Avoid high cost expansions. Note: This heuristic is questionable in
2036 // that our definition of "high cost" is not exactly principled.
2037 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2038 TTI, PreHeader->getTerminator()))
2039 continue;
2040
2041 if (!Rewriter.isSafeToExpand(ExitCount))
2042 continue;
2043
2044 Changed |= linearFunctionTestReplace(L, ExitingBB,
2045 ExitCount, IndVar,
2046 Rewriter);
2047 }
2048 }
2049 // Clear the rewriter cache, because values that are in the rewriter's cache
2050 // can be deleted in the loop below, causing the AssertingVH in the cache to
2051 // trigger.
2052 Rewriter.clear();
2053
2054 // Now that we're done iterating through lists, clean up any instructions
2055 // which are now dead.
2056 while (!DeadInsts.empty()) {
2057 Value *V = DeadInsts.pop_back_val();
2058
2059 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2060 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2061 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2062 Changed |=
2063 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2064 }
2065
2066 // The Rewriter may not be used from this point on.
2067
2068 // Loop-invariant instructions in the preheader that aren't used in the
2069 // loop may be sunk below the loop to reduce register pressure.
2070 Changed |= sinkUnusedInvariants(L);
2071
2072 // rewriteFirstIterationLoopExitValues does not rely on the computation of
2073 // trip count and therefore can further simplify exit values in addition to
2074 // rewriteLoopExitValues.
2075 Changed |= rewriteFirstIterationLoopExitValues(L);
2076
2077 // Clean up dead instructions.
2078 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2079
2080 // Check a post-condition.
2081 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2082 "Indvars did not preserve LCSSA!");
2083 if (VerifyMemorySSA && MSSAU)
2084 MSSAU->getMemorySSA()->verifyMemorySSA();
2085
2086 return Changed;
2087}
2088
2091 LPMUpdater &) {
2092 Function *F = L.getHeader()->getParent();
2093 const DataLayout &DL = F->getDataLayout();
2094
2095 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2096 WidenIndVars && AllowIVWidening);
2097 if (!IVS.run(&L))
2098 return PreservedAnalyses::all();
2099
2100 auto PA = getLoopPassPreservedAnalyses();
2101 PA.preserveSet<CFGAnalyses>();
2102 if (IVS.runUnswitching()) {
2104 PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
2105 }
2106
2107 if (AR.MSSA)
2108 PA.preserve<MemorySSAAnalysis>();
2109 return PA;
2110}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define DEBUG_TYPE
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
static Value * genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, const SCEV *ExitCount, bool UsePostInc, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
Insert an IR expression which computes the value held by the IV IndVar (which must be an loop counter...
static void replaceExitCond(BranchInst *BI, Value *NewCond, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static cl::opt< bool > DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization"))
static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB)
Whether the current loop exit test is based on this value.
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(UnusedIndVarInLoop, "unusedindvarinloop", "only replace exit value when it is an unused " "induction variable in the loop and has cheap replacement cost"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
Update information about the induction variable that is extended by this sign or zero extend operatio...
static void replaceLoopPHINodesWithPreheaderValues(LoopInfo *LI, Loop *L, SmallVectorImpl< WeakTrackingVH > &DeadInsts, ScalarEvolution &SE)
static bool needsLFTR(Loop *L, BasicBlock *ExitingBB)
linearFunctionTestReplace policy.
static bool optimizeLoopExitWithUnknownExitCount(const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static Value * createInvariantCond(const Loop *L, BasicBlock *ExitingBB, const ScalarEvolution::LoopInvariantPredicate &LIP, SCEVExpander &Rewriter)
static bool isLoopCounter(PHINode *Phi, Loop *L, ScalarEvolution *SE)
Return true if the given phi is a "counter" in L.
static std::optional< Value * > createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB, const SCEV *MaxIter, bool Inverted, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter)
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value * > &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L)
Given an Value which is hoped to be part of an add recurance in the given loop, return the associated...
static Constant * createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB, bool IsTaken)
static cl::opt< bool > LoopPredicationTraps("indvars-predicate-loop-traps", cl::Hidden, cl::init(true), cl::desc("Predicate conditions that trap in loops with only local writes"))
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
static PHINode * FindLoopCounter(Loop *L, BasicBlock *ExitingBB, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
Search the loop header for a loop counter (anadd rec w/step of one) suitable for use by LFTR.
static cl::opt< bool > AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), cl::desc("Allow widening of indvars to eliminate s/zext"))
static bool crashingBBWithoutEffect(const BasicBlock &BB)
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
Convert APF to an integer, if possible.
static cl::opt< bool > LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), cl::desc("Predicate conditions in read only loops"))
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1314
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition InstrTypes.h:610
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
const APFloat & getValueAPF() const
Definition Constants.h:320
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:131
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition DataLayout.h:229
static DebugLoc getDropped()
Definition DebugLoc.h:164
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This instruction compares its operands according to the predicate given to the constructor.
CmpPredicate getCmpPredicate() const
CmpPredicate getInverseCmpPredicate() const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition LoopInfo.h:441
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:936
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
This node represents a polynomial recurrence on the trip count of the specified loop.
LLVM_ABI const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
LLVM_ABI const SCEVAddRecExpr * getPostIncExpr(ScalarEvolution &SE) const
Return an expression representing the value of this expression one iteration of the loop ahead.
This class uses information about analyze scalars to rewrite expressions in canonical form.
This class represents an analyzed expression in the program.
LLVM_ABI bool isOne() const
Return true if the expression is a constant one.
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI std::optional< LoopInvariantPredicate > getLoopInvariantExitCondDuringFirstIterations(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, const Instruction *CtxI, const SCEV *MaxIter)
If the result of the predicate LHS Pred RHS is loop invariant with respect to L at given Context duri...
LLVM_ABI Type * getWiderType(Type *Ty1, Type *Ty2) const
LLVM_ABI bool isLoopEntryGuardedByCond(const Loop *L, CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS.
LLVM_ABI const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI ExitLimit computeExitLimitFromCond(const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates=false)
Compute the number of times the backedge of the specified loop will execute if its exit condition wer...
LLVM_ABI uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI std::optional< bool > evaluatePredicateAt(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Check whether the condition described by Pred, LHS, and RHS is true or false in the given Context.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI void forgetTopmostLoop(const Loop *L)
LLVM_ABI void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
LLVM_ABI const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getMinusOne(Type *Ty)
Return a SCEV for the constant -1 of a specific type.
LLVM_ABI const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI const SCEV * getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
Promote the operands to the wider of the types using zero-extension, and then perform a umin operatio...
LLVM_ABI const SCEV * getCouldNotCompute()
LLVM_ABI const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
@ SymbolicMaximum
An expression which provides an upper bound on the exact trip count.
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI bool isKnownPredicateAt(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
cst_pred_ty< is_one > m_scev_One()
Match an integer 1.
specificloop_ty m_SpecificLoop(const Loop *L)
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
bool match(const SCEV *S, const Pattern &P)
class_match< const SCEV > m_SCEV()
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, Instruction *OnPathTo, DominatorTree *DT)
Return true if undefined behavior would provable be executed on the path to OnPathTo if Root produced...
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
PHINode * createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges)
Widen Induction Variables - Extend the width of an IV to cover its widest uses.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:754
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI cl::opt< unsigned > SCEVCheapExpansionBudget
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
std::pair< bool, bool > simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition MemorySSA.cpp:84
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
LLVM_ABI bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond)
Return true if the induction variable IV in a Loop whose latch is LatchBlock would become dead if the...
LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
LLVM_ABI bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
Definition Local.cpp:641
@ UnusedIndVarInLoop
Definition LoopUtils.h:520
@ OnlyCheapRepl
Definition LoopUtils.h:518
@ NeverRepl
Definition LoopUtils.h:517
@ NoHardUse
Definition LoopUtils.h:519
@ AlwaysRepl
Definition LoopUtils.h:521
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
A marker analysis to determine if SimpleLoopUnswitch should run again on a given loop.
Collect information about induction variables that are used by sign/zero extend operations.