LLVM 22.0.0git
IndVarSimplify.cpp
Go to the documentation of this file.
1//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This transformation analyzes and transforms the induction variables (and
10// computations derived from them) into simpler forms suitable for subsequent
11// analysis and transformation.
12//
13// If the trip count of a loop is computable, this pass also makes the following
14// changes:
15// 1. The exit condition for the loop is canonicalized to compare the
16// induction value against the exit value. This turns loops like:
17// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18// 2. Any use outside of the loop of an expression derived from the indvar
19// is changed to compute the derived value outside of the loop, eliminating
20// the dependence on the exit value of the induction variable. If the only
21// purpose of the loop is to compute the exit value of some derived
22// expression, this transformation will make the loop dead.
23//
24//===----------------------------------------------------------------------===//
25
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/Statistic.h"
44#include "llvm/IR/BasicBlock.h"
45#include "llvm/IR/Constant.h"
47#include "llvm/IR/Constants.h"
48#include "llvm/IR/DataLayout.h"
50#include "llvm/IR/Dominators.h"
51#include "llvm/IR/Function.h"
52#include "llvm/IR/IRBuilder.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
57#include "llvm/IR/Intrinsics.h"
58#include "llvm/IR/PassManager.h"
60#include "llvm/IR/Type.h"
61#include "llvm/IR/Use.h"
62#include "llvm/IR/User.h"
63#include "llvm/IR/Value.h"
64#include "llvm/IR/ValueHandle.h"
67#include "llvm/Support/Debug.h"
76#include <cassert>
77#include <cstdint>
78#include <utility>
79
80using namespace llvm;
81using namespace PatternMatch;
82using namespace SCEVPatternMatch;
83
84#define DEBUG_TYPE "indvars"
85
86STATISTIC(NumWidened , "Number of indvars widened");
87STATISTIC(NumReplaced , "Number of exit values replaced");
88STATISTIC(NumLFTR , "Number of loop exit tests replaced");
89STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
90STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
91
93 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
94 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
96 clEnumValN(NeverRepl, "never", "never replace exit value"),
98 "only replace exit value when the cost is cheap"),
100 UnusedIndVarInLoop, "unusedindvarinloop",
101 "only replace exit value when it is an unused "
102 "induction variable in the loop and has cheap replacement cost"),
103 clEnumValN(NoHardUse, "noharduse",
104 "only replace exit values when loop def likely dead"),
105 clEnumValN(AlwaysRepl, "always",
106 "always replace exit value whenever possible")));
107
109 "indvars-post-increment-ranges", cl::Hidden,
110 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
111 cl::init(true));
112
113static cl::opt<bool>
114DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
115 cl::desc("Disable Linear Function Test Replace optimization"));
116
117static cl::opt<bool>
118LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
119 cl::desc("Predicate conditions in read only loops"));
120
122 "indvars-predicate-loop-traps", cl::Hidden, cl::init(true),
123 cl::desc("Predicate conditions that trap in loops with only local writes"));
124
125static cl::opt<bool>
126AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
127 cl::desc("Allow widening of indvars to eliminate s/zext"));
128
129namespace {
130
131class IndVarSimplify {
132 LoopInfo *LI;
133 ScalarEvolution *SE;
134 DominatorTree *DT;
135 const DataLayout &DL;
138 std::unique_ptr<MemorySSAUpdater> MSSAU;
139
141 bool WidenIndVars;
142
143 bool RunUnswitching = false;
144
145 bool handleFloatingPointIV(Loop *L, PHINode *PH);
146 bool rewriteNonIntegerIVs(Loop *L);
147
148 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
149 /// Try to improve our exit conditions by converting condition from signed
150 /// to unsigned or rotating computation out of the loop.
151 /// (See inline comment about why this is duplicated from simplifyAndExtend)
152 bool canonicalizeExitCondition(Loop *L);
153 /// Try to eliminate loop exits based on analyzeable exit counts
154 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
155 /// Try to form loop invariant tests for loop exits by changing how many
156 /// iterations of the loop run when that is unobservable.
157 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
158
159 bool rewriteFirstIterationLoopExitValues(Loop *L);
160
161 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
162 const SCEV *ExitCount,
163 PHINode *IndVar, SCEVExpander &Rewriter);
164
165public:
166 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
167 const DataLayout &DL, TargetLibraryInfo *TLI,
168 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
169 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
170 WidenIndVars(WidenIndVars) {
171 if (MSSA)
172 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
173 }
174
175 bool run(Loop *L);
176
177 bool runUnswitching() const { return RunUnswitching; }
178};
179
180} // end anonymous namespace
181
182//===----------------------------------------------------------------------===//
183// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
184//===----------------------------------------------------------------------===//
185
186/// Convert APF to an integer, if possible.
187static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
188 bool isExact = false;
189 // See if we can convert this to an int64_t
190 uint64_t UIntVal;
191 if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
192 APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
193 !isExact)
194 return false;
195 IntVal = UIntVal;
196 return true;
197}
198
199// Ensure we stay within the bounds of fp values that can be represented as
200// integers without gaps, which are 2^24 and 2^53 for IEEE-754 single and double
201// precision respectively (both on negative and positive side).
202static bool isRepresentableAsExactInteger(ConstantFP *FPVal, int64_t IntVal) {
203 const auto &InitValueFltSema = FPVal->getValueAPF().getSemantics();
204 if (!APFloat::isIEEELikeFP(InitValueFltSema))
205 return false;
206
207 return isUIntN(APFloat::semanticsPrecision(InitValueFltSema),
208 AbsoluteValue(IntVal));
209}
210
211/// If the loop has floating induction variable then insert corresponding
212/// integer induction variable if possible.
213/// For example,
214/// for(double i = 0; i < 10000; ++i)
215/// bar(i)
216/// is converted into
217/// for(int i = 0; i < 10000; ++i)
218/// bar((double)i);
219bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
220 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
221 unsigned BackEdge = IncomingEdge^1;
222
223 // Check incoming value.
224 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
225
226 int64_t InitValue;
227 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue) ||
228 !isRepresentableAsExactInteger(InitValueVal, InitValue))
229 return false;
230
231 // Check IV increment. Reject this PN if increment operation is not
232 // an add or increment value can not be represented by an integer.
233 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
234 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
235
236 // If this is not an add of the PHI with a constantfp, or if the constant fp
237 // is not an integer, bail out.
238 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
239 int64_t IncValue;
240 if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
241 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
242 return false;
243
244 // Check Incr uses. One user is PN and the other user is an exit condition
245 // used by the conditional terminator.
246 Value::user_iterator IncrUse = Incr->user_begin();
247 Instruction *U1 = cast<Instruction>(*IncrUse++);
248 if (IncrUse == Incr->user_end()) return false;
249 Instruction *U2 = cast<Instruction>(*IncrUse++);
250 if (IncrUse != Incr->user_end()) return false;
251
252 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
253 // only used by a branch, we can't transform it.
254 FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
255 if (!Compare)
257 if (!Compare || !Compare->hasOneUse() ||
258 !isa<BranchInst>(Compare->user_back()))
259 return false;
260
261 BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
262
263 // We need to verify that the branch actually controls the iteration count
264 // of the loop. If not, the new IV can overflow and no one will notice.
265 // The branch block must be in the loop and one of the successors must be out
266 // of the loop.
267 assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
268 if (!L->contains(TheBr->getParent()) ||
269 (L->contains(TheBr->getSuccessor(0)) &&
270 L->contains(TheBr->getSuccessor(1))))
271 return false;
272
273 // If it isn't a comparison with an integer-as-fp (the exit value), we can't
274 // transform it.
275 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
276 int64_t ExitValue;
277 if (ExitValueVal == nullptr ||
278 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue) ||
279 !isRepresentableAsExactInteger(ExitValueVal, ExitValue))
280 return false;
281
282 // Find new predicate for integer comparison.
284 switch (Compare->getPredicate()) {
285 default: return false; // Unknown comparison.
287 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
289 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
291 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
293 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
295 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
297 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
298 }
299
300 // We convert the floating point induction variable to a signed i32 value if
301 // we can. This is only safe if the comparison will not overflow in a way
302 // that won't be trapped by the integer equivalent operations. Check for this
303 // now.
304 // TODO: We could use i64 if it is native and the range requires it.
305
306 // The start/stride/exit values must all fit in signed i32.
307 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
308 return false;
309
310 // If not actually striding (add x, 0.0), avoid touching the code.
311 if (IncValue == 0)
312 return false;
313
314 // Positive and negative strides have different safety conditions.
315 if (IncValue > 0) {
316 // If we have a positive stride, we require the init to be less than the
317 // exit value.
318 if (InitValue >= ExitValue)
319 return false;
320
321 uint32_t Range = uint32_t(ExitValue-InitValue);
322 // Check for infinite loop, either:
323 // while (i <= Exit) or until (i > Exit)
324 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
325 if (++Range == 0) return false; // Range overflows.
326 }
327
328 unsigned Leftover = Range % uint32_t(IncValue);
329
330 // If this is an equality comparison, we require that the strided value
331 // exactly land on the exit value, otherwise the IV condition will wrap
332 // around and do things the fp IV wouldn't.
333 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
334 Leftover != 0)
335 return false;
336
337 // If the stride would wrap around the i32 before exiting, we can't
338 // transform the IV.
339 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
340 return false;
341 } else {
342 // If we have a negative stride, we require the init to be greater than the
343 // exit value.
344 if (InitValue <= ExitValue)
345 return false;
346
347 uint32_t Range = uint32_t(InitValue-ExitValue);
348 // Check for infinite loop, either:
349 // while (i >= Exit) or until (i < Exit)
350 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
351 if (++Range == 0) return false; // Range overflows.
352 }
353
354 unsigned Leftover = Range % uint32_t(-IncValue);
355
356 // If this is an equality comparison, we require that the strided value
357 // exactly land on the exit value, otherwise the IV condition will wrap
358 // around and do things the fp IV wouldn't.
359 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
360 Leftover != 0)
361 return false;
362
363 // If the stride would wrap around the i32 before exiting, we can't
364 // transform the IV.
365 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
366 return false;
367 }
368
369 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
370
371 // Insert new integer induction variable.
372 PHINode *NewPHI =
373 PHINode::Create(Int32Ty, 2, PN->getName() + ".int", PN->getIterator());
374 NewPHI->addIncoming(ConstantInt::getSigned(Int32Ty, InitValue),
375 PN->getIncomingBlock(IncomingEdge));
376 NewPHI->setDebugLoc(PN->getDebugLoc());
377
378 Instruction *NewAdd = BinaryOperator::CreateAdd(
379 NewPHI, ConstantInt::getSigned(Int32Ty, IncValue),
380 Incr->getName() + ".int", Incr->getIterator());
381 NewAdd->setDebugLoc(Incr->getDebugLoc());
382 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
383
384 ICmpInst *NewCompare = new ICmpInst(
385 TheBr->getIterator(), NewPred, NewAdd,
386 ConstantInt::getSigned(Int32Ty, ExitValue), Compare->getName());
387 NewCompare->setDebugLoc(Compare->getDebugLoc());
388
389 // In the following deletions, PN may become dead and may be deleted.
390 // Use a WeakTrackingVH to observe whether this happens.
391 WeakTrackingVH WeakPH = PN;
392
393 // Delete the old floating point exit comparison. The branch starts using the
394 // new comparison.
395 NewCompare->takeName(Compare);
396 Compare->replaceAllUsesWith(NewCompare);
397 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
398
399 // Delete the old floating point increment.
400 Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
401 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
402
403 // If the FP induction variable still has uses, this is because something else
404 // in the loop uses its value. In order to canonicalize the induction
405 // variable, we chose to eliminate the IV and rewrite it in terms of an
406 // int->fp cast.
407 //
408 // We give preference to sitofp over uitofp because it is faster on most
409 // platforms.
410 if (WeakPH) {
411 Instruction *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
412 PN->getParent()->getFirstInsertionPt());
413 Conv->setDebugLoc(PN->getDebugLoc());
414 PN->replaceAllUsesWith(Conv);
415 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
416 }
417 return true;
418}
419
420bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
421 // First step. Check to see if there are any floating-point recurrences.
422 // If there are, change them into integer recurrences, permitting analysis by
423 // the SCEV routines.
424 BasicBlock *Header = L->getHeader();
425
427
428 bool Changed = false;
429 for (WeakTrackingVH &PHI : PHIs)
430 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHI))
431 Changed |= handleFloatingPointIV(L, PN);
432
433 // If the loop previously had floating-point IV, ScalarEvolution
434 // may not have been able to compute a trip count. Now that we've done some
435 // re-writing, the trip count may be computable.
436 if (Changed)
437 SE->forgetLoop(L);
438 return Changed;
439}
440
441//===---------------------------------------------------------------------===//
442// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
443// they will exit at the first iteration.
444//===---------------------------------------------------------------------===//
445
446/// Check to see if this loop has loop invariant conditions which lead to loop
447/// exits. If so, we know that if the exit path is taken, it is at the first
448/// loop iteration. This lets us predict exit values of PHI nodes that live in
449/// loop header.
450bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
451 // Verify the input to the pass is already in LCSSA form.
452 assert(L->isLCSSAForm(*DT));
453
454 SmallVector<BasicBlock *, 8> ExitBlocks;
455 L->getUniqueExitBlocks(ExitBlocks);
456
457 bool MadeAnyChanges = false;
458 for (auto *ExitBB : ExitBlocks) {
459 // If there are no more PHI nodes in this exit block, then no more
460 // values defined inside the loop are used on this path.
461 for (PHINode &PN : ExitBB->phis()) {
462 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
463 IncomingValIdx != E; ++IncomingValIdx) {
464 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
465
466 // Can we prove that the exit must run on the first iteration if it
467 // runs at all? (i.e. early exits are fine for our purposes, but
468 // traces which lead to this exit being taken on the 2nd iteration
469 // aren't.) Note that this is about whether the exit branch is
470 // executed, not about whether it is taken.
471 if (!L->getLoopLatch() ||
472 !DT->dominates(IncomingBB, L->getLoopLatch()))
473 continue;
474
475 // Get condition that leads to the exit path.
476 auto *TermInst = IncomingBB->getTerminator();
477
478 Value *Cond = nullptr;
479 if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
480 // Must be a conditional branch, otherwise the block
481 // should not be in the loop.
482 Cond = BI->getCondition();
483 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
484 Cond = SI->getCondition();
485 else
486 continue;
487
488 if (!L->isLoopInvariant(Cond))
489 continue;
490
491 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
492
493 // Only deal with PHIs in the loop header.
494 if (!ExitVal || ExitVal->getParent() != L->getHeader())
495 continue;
496
497 // If ExitVal is a PHI on the loop header, then we know its
498 // value along this exit because the exit can only be taken
499 // on the first iteration.
500 auto *LoopPreheader = L->getLoopPreheader();
501 assert(LoopPreheader && "Invalid loop");
502 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
503 if (PreheaderIdx != -1) {
504 assert(ExitVal->getParent() == L->getHeader() &&
505 "ExitVal must be in loop header");
506 MadeAnyChanges = true;
507 PN.setIncomingValue(IncomingValIdx,
508 ExitVal->getIncomingValue(PreheaderIdx));
509 SE->forgetValue(&PN);
510 }
511 }
512 }
513 }
514 return MadeAnyChanges;
515}
516
517//===----------------------------------------------------------------------===//
518// IV Widening - Extend the width of an IV to cover its widest uses.
519//===----------------------------------------------------------------------===//
520
521/// Update information about the induction variable that is extended by this
522/// sign or zero extend operation. This is used to determine the final width of
523/// the IV before actually widening it.
524static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
525 ScalarEvolution *SE,
526 const TargetTransformInfo *TTI) {
527 bool IsSigned = Cast->getOpcode() == Instruction::SExt;
528 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
529 return;
530
531 Type *Ty = Cast->getType();
532 uint64_t Width = SE->getTypeSizeInBits(Ty);
533 if (!Cast->getDataLayout().isLegalInteger(Width))
534 return;
535
536 // Check that `Cast` actually extends the induction variable (we rely on this
537 // later). This takes care of cases where `Cast` is extending a truncation of
538 // the narrow induction variable, and thus can end up being narrower than the
539 // "narrow" induction variable.
540 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
541 if (NarrowIVWidth >= Width)
542 return;
543
544 // Cast is either an sext or zext up to this point.
545 // We should not widen an indvar if arithmetics on the wider indvar are more
546 // expensive than those on the narrower indvar. We check only the cost of ADD
547 // because at least an ADD is required to increment the induction variable. We
548 // could compute more comprehensively the cost of all instructions on the
549 // induction variable when necessary.
550 if (TTI &&
551 TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
552 TTI->getArithmeticInstrCost(Instruction::Add,
553 Cast->getOperand(0)->getType())) {
554 return;
555 }
556
557 if (!WI.WidestNativeType ||
558 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
560 WI.IsSigned = IsSigned;
561 return;
562 }
563
564 // We extend the IV to satisfy the sign of its user(s), or 'signed'
565 // if there are multiple users with both sign- and zero extensions,
566 // in order not to introduce nondeterministic behaviour based on the
567 // unspecified order of a PHI nodes' users-iterator.
568 WI.IsSigned |= IsSigned;
569}
570
571//===----------------------------------------------------------------------===//
572// Live IV Reduction - Minimize IVs live across the loop.
573//===----------------------------------------------------------------------===//
574
575//===----------------------------------------------------------------------===//
576// Simplification of IV users based on SCEV evaluation.
577//===----------------------------------------------------------------------===//
578
579namespace {
580
581class IndVarSimplifyVisitor : public IVVisitor {
582 ScalarEvolution *SE;
583 const TargetTransformInfo *TTI;
584 PHINode *IVPhi;
585
586public:
587 WideIVInfo WI;
588
589 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
590 const TargetTransformInfo *TTI,
591 const DominatorTree *DTree)
592 : SE(SCEV), TTI(TTI), IVPhi(IV) {
593 DT = DTree;
594 WI.NarrowIV = IVPhi;
595 }
596
597 // Implement the interface used by simplifyUsersOfIV.
598 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
599};
600
601} // end anonymous namespace
602
603/// Iteratively perform simplification on a worklist of IV users. Each
604/// successive simplification may push more users which may themselves be
605/// candidates for simplification.
606///
607/// Sign/Zero extend elimination is interleaved with IV simplification.
608bool IndVarSimplify::simplifyAndExtend(Loop *L,
609 SCEVExpander &Rewriter,
610 LoopInfo *LI) {
612
613 auto *GuardDecl = Intrinsic::getDeclarationIfExists(
614 L->getBlocks()[0]->getModule(), Intrinsic::experimental_guard);
615 bool HasGuards = GuardDecl && !GuardDecl->use_empty();
616
618 llvm::make_pointer_range(L->getHeader()->phis()));
619
620 // Each round of simplification iterates through the SimplifyIVUsers worklist
621 // for all current phis, then determines whether any IVs can be
622 // widened. Widening adds new phis to LoopPhis, inducing another round of
623 // simplification on the wide IVs.
624 bool Changed = false;
625 while (!LoopPhis.empty()) {
626 // Evaluate as many IV expressions as possible before widening any IVs. This
627 // forces SCEV to set no-wrap flags before evaluating sign/zero
628 // extension. The first time SCEV attempts to normalize sign/zero extension,
629 // the result becomes final. So for the most predictable results, we delay
630 // evaluation of sign/zero extend evaluation until needed, and avoid running
631 // other SCEV based analysis prior to simplifyAndExtend.
632 do {
633 PHINode *CurrIV = LoopPhis.pop_back_val();
634
635 // Information about sign/zero extensions of CurrIV.
636 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
637
638 const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts,
639 Rewriter, &Visitor);
640
641 Changed |= C;
642 RunUnswitching |= U;
643 if (Visitor.WI.WidestNativeType) {
644 WideIVs.push_back(Visitor.WI);
645 }
646 } while(!LoopPhis.empty());
647
648 // Continue if we disallowed widening.
649 if (!WidenIndVars)
650 continue;
651
652 for (; !WideIVs.empty(); WideIVs.pop_back()) {
653 unsigned ElimExt;
654 unsigned Widened;
655 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
656 DT, DeadInsts, ElimExt, Widened,
657 HasGuards, UsePostIncrementRanges)) {
658 NumElimExt += ElimExt;
659 NumWidened += Widened;
660 Changed = true;
661 LoopPhis.push_back(WidePhi);
662 }
663 }
664 }
665 return Changed;
666}
667
668//===----------------------------------------------------------------------===//
669// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
670//===----------------------------------------------------------------------===//
671
672/// Given an Value which is hoped to be part of an add recurance in the given
673/// loop, return the associated Phi node if so. Otherwise, return null. Note
674/// that this is less general than SCEVs AddRec checking.
677 if (!IncI)
678 return nullptr;
679
680 switch (IncI->getOpcode()) {
681 case Instruction::Add:
682 case Instruction::Sub:
683 break;
684 case Instruction::GetElementPtr:
685 // An IV counter must preserve its type.
686 if (IncI->getNumOperands() == 2)
687 break;
688 [[fallthrough]];
689 default:
690 return nullptr;
691 }
692
693 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
694 if (Phi && Phi->getParent() == L->getHeader()) {
695 if (L->isLoopInvariant(IncI->getOperand(1)))
696 return Phi;
697 return nullptr;
698 }
699 if (IncI->getOpcode() == Instruction::GetElementPtr)
700 return nullptr;
701
702 // Allow add/sub to be commuted.
703 Phi = dyn_cast<PHINode>(IncI->getOperand(1));
704 if (Phi && Phi->getParent() == L->getHeader()) {
705 if (L->isLoopInvariant(IncI->getOperand(0)))
706 return Phi;
707 }
708 return nullptr;
709}
710
711/// Whether the current loop exit test is based on this value. Currently this
712/// is limited to a direct use in the loop condition.
713static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
714 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
716 // TODO: Allow non-icmp loop test.
717 if (!ICmp)
718 return false;
719
720 // TODO: Allow indirect use.
721 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
722}
723
724/// linearFunctionTestReplace policy. Return true unless we can show that the
725/// current exit test is already sufficiently canonical.
726static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
727 assert(L->getLoopLatch() && "Must be in simplified form");
728
729 // Avoid converting a constant or loop invariant test back to a runtime
730 // test. This is critical for when SCEV's cached ExitCount is less precise
731 // than the current IR (such as after we've proven a particular exit is
732 // actually dead and thus the BE count never reaches our ExitCount.)
733 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
734 if (L->isLoopInvariant(BI->getCondition()))
735 return false;
736
737 // Do LFTR to simplify the exit condition to an ICMP.
739 if (!Cond)
740 return true;
741
742 // Do LFTR to simplify the exit ICMP to EQ/NE
743 ICmpInst::Predicate Pred = Cond->getPredicate();
744 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
745 return true;
746
747 // Look for a loop invariant RHS
748 Value *LHS = Cond->getOperand(0);
749 Value *RHS = Cond->getOperand(1);
750 if (!L->isLoopInvariant(RHS)) {
751 if (!L->isLoopInvariant(LHS))
752 return true;
753 std::swap(LHS, RHS);
754 }
755 // Look for a simple IV counter LHS
757 if (!Phi)
758 Phi = getLoopPhiForCounter(LHS, L);
759
760 if (!Phi)
761 return true;
762
763 // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
764 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
765 if (Idx < 0)
766 return true;
767
768 // Do LFTR if the exit condition's IV is *not* a simple counter.
769 Value *IncV = Phi->getIncomingValue(Idx);
770 return Phi != getLoopPhiForCounter(IncV, L);
771}
772
773/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
774/// down to checking that all operands are constant and listing instructions
775/// that may hide undef.
777 unsigned Depth) {
778 if (isa<Constant>(V))
779 return !isa<UndefValue>(V);
780
781 if (Depth >= 6)
782 return false;
783
784 // Conservatively handle non-constant non-instructions. For example, Arguments
785 // may be undef.
787 if (!I)
788 return false;
789
790 // Load and return values may be undef.
791 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
792 return false;
793
794 // Optimistically handle other instructions.
795 for (Value *Op : I->operands()) {
796 if (!Visited.insert(Op).second)
797 continue;
798 if (!hasConcreteDefImpl(Op, Visited, Depth+1))
799 return false;
800 }
801 return true;
802}
803
804/// Return true if the given value is concrete. We must prove that undef can
805/// never reach it.
806///
807/// TODO: If we decide that this is a good approach to checking for undef, we
808/// may factor it into a common location.
809static bool hasConcreteDef(Value *V) {
811 Visited.insert(V);
812 return hasConcreteDefImpl(V, Visited, 0);
813}
814
815/// Return true if the given phi is a "counter" in L. A counter is an
816/// add recurance (of integer or pointer type) with an arbitrary start, and a
817/// step of 1. Note that L must have exactly one latch.
818static bool isLoopCounter(PHINode* Phi, Loop *L,
819 ScalarEvolution *SE) {
820 assert(Phi->getParent() == L->getHeader());
821 assert(L->getLoopLatch());
822
823 if (!SE->isSCEVable(Phi->getType()))
824 return false;
825
826 const SCEV *S = SE->getSCEV(Phi);
828 return false;
829
830 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
831 Value *IncV = Phi->getIncomingValue(LatchIdx);
832 return (getLoopPhiForCounter(IncV, L) == Phi &&
833 isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
834}
835
836/// Search the loop header for a loop counter (anadd rec w/step of one)
837/// suitable for use by LFTR. If multiple counters are available, select the
838/// "best" one based profitable heuristics.
839///
840/// BECount may be an i8* pointer type. The pointer difference is already
841/// valid count without scaling the address stride, so it remains a pointer
842/// expression as far as SCEV is concerned.
843static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
844 const SCEV *BECount,
846 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
847
848 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
849
850 // Loop over all of the PHI nodes, looking for a simple counter.
851 PHINode *BestPhi = nullptr;
852 const SCEV *BestInit = nullptr;
853 BasicBlock *LatchBlock = L->getLoopLatch();
854 assert(LatchBlock && "Must be in simplified form");
855 const DataLayout &DL = L->getHeader()->getDataLayout();
856
857 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
858 PHINode *Phi = cast<PHINode>(I);
859 if (!isLoopCounter(Phi, L, SE))
860 continue;
861
862 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
863
864 // AR may be a pointer type, while BECount is an integer type.
865 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
866 // AR may not be a narrower type, or we may never exit.
867 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
868 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
869 continue;
870
871 // Avoid reusing a potentially undef value to compute other values that may
872 // have originally had a concrete definition.
873 if (!hasConcreteDef(Phi)) {
874 // We explicitly allow unknown phis as long as they are already used by
875 // the loop exit test. This is legal since performing LFTR could not
876 // increase the number of undef users.
877 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
878 if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
879 !isLoopExitTestBasedOn(IncPhi, ExitingBB))
880 continue;
881 }
882
883 // Avoid introducing undefined behavior due to poison which didn't exist in
884 // the original program. (Annoyingly, the rules for poison and undef
885 // propagation are distinct, so this does NOT cover the undef case above.)
886 // We have to ensure that we don't introduce UB by introducing a use on an
887 // iteration where said IV produces poison. Our strategy here differs for
888 // pointers and integer IVs. For integers, we strip and reinfer as needed,
889 // see code in linearFunctionTestReplace. For pointers, we restrict
890 // transforms as there is no good way to reinfer inbounds once lost.
891 if (!Phi->getType()->isIntegerTy() &&
892 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
893 continue;
894
895 const SCEV *Init = AR->getStart();
896
897 if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) {
898 // Don't force a live loop counter if another IV can be used.
899 if (isAlmostDeadIV(Phi, LatchBlock, Cond))
900 continue;
901
902 // Prefer to count-from-zero. This is a more "canonical" counter form. It
903 // also prefers integer to pointer IVs.
904 if (BestInit->isZero() != Init->isZero()) {
905 if (BestInit->isZero())
906 continue;
907 }
908 // If two IVs both count from zero or both count from nonzero then the
909 // narrower is likely a dead phi that has been widened. Use the wider phi
910 // to allow the other to be eliminated.
911 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
912 continue;
913 }
914 BestPhi = Phi;
915 BestInit = Init;
916 }
917 return BestPhi;
918}
919
920/// Insert an IR expression which computes the value held by the IV IndVar
921/// (which must be an loop counter w/unit stride) after the backedge of loop L
922/// is taken ExitCount times.
923static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
924 const SCEV *ExitCount, bool UsePostInc, Loop *L,
925 SCEVExpander &Rewriter, ScalarEvolution *SE) {
926 assert(isLoopCounter(IndVar, L, SE));
927 assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
928 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
929 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
930
931 // For integer IVs, truncate the IV before computing the limit unless we
932 // know apriori that the limit must be a constant when evaluated in the
933 // bitwidth of the IV. We prefer (potentially) keeping a truncate of the
934 // IV in the loop over a (potentially) expensive expansion of the widened
935 // exit count add(zext(add)) expression.
936 if (IndVar->getType()->isIntegerTy() &&
937 SE->getTypeSizeInBits(AR->getType()) >
938 SE->getTypeSizeInBits(ExitCount->getType())) {
939 const SCEV *IVInit = AR->getStart();
940 if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
941 AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
942 }
943
944 const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
945 const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
946 assert(SE->isLoopInvariant(IVLimit, L) &&
947 "Computed iteration count is not loop invariant!");
948 return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
949 ExitingBB->getTerminator());
950}
951
952/// This method rewrites the exit condition of the loop to be a canonical !=
953/// comparison against the incremented loop induction variable. This pass is
954/// able to rewrite the exit tests of any loop where the SCEV analysis can
955/// determine a loop-invariant trip count of the loop, which is actually a much
956/// broader range than just linear tests.
957bool IndVarSimplify::
958linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
959 const SCEV *ExitCount,
960 PHINode *IndVar, SCEVExpander &Rewriter) {
961 assert(L->getLoopLatch() && "Loop no longer in simplified form?");
962 assert(isLoopCounter(IndVar, L, SE));
963 Instruction * const IncVar =
964 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
965
966 // Initialize CmpIndVar to the preincremented IV.
967 Value *CmpIndVar = IndVar;
968 bool UsePostInc = false;
969
970 // If the exiting block is the same as the backedge block, we prefer to
971 // compare against the post-incremented value, otherwise we must compare
972 // against the preincremented value.
973 if (ExitingBB == L->getLoopLatch()) {
974 // For pointer IVs, we chose to not strip inbounds which requires us not
975 // to add a potentially UB introducing use. We need to either a) show
976 // the loop test we're modifying is already in post-inc form, or b) show
977 // that adding a use must not introduce UB.
978 bool SafeToPostInc =
979 IndVar->getType()->isIntegerTy() ||
980 isLoopExitTestBasedOn(IncVar, ExitingBB) ||
981 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
982 if (SafeToPostInc) {
983 UsePostInc = true;
984 CmpIndVar = IncVar;
985 }
986 }
987
988 // It may be necessary to drop nowrap flags on the incrementing instruction
989 // if either LFTR moves from a pre-inc check to a post-inc check (in which
990 // case the increment might have previously been poison on the last iteration
991 // only) or if LFTR switches to a different IV that was previously dynamically
992 // dead (and as such may be arbitrarily poison). We remove any nowrap flags
993 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
994 // check), because the pre-inc addrec flags may be adopted from the original
995 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
996 // TODO: This handling is inaccurate for one case: If we switch to a
997 // dynamically dead IV that wraps on the first loop iteration only, which is
998 // not covered by the post-inc addrec. (If the new IV was not dynamically
999 // dead, it could not be poison on the first iteration in the first place.)
1000 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
1001 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
1002 if (BO->hasNoUnsignedWrap())
1003 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1004 if (BO->hasNoSignedWrap())
1005 BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1006 }
1007
1008 Value *ExitCnt = genLoopLimit(
1009 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1010 assert(ExitCnt->getType()->isPointerTy() ==
1011 IndVar->getType()->isPointerTy() &&
1012 "genLoopLimit missed a cast");
1013
1014 // Insert a new icmp_ne or icmp_eq instruction before the branch.
1015 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1016 ICmpInst::Predicate P;
1017 if (L->contains(BI->getSuccessor(0)))
1018 P = ICmpInst::ICMP_NE;
1019 else
1020 P = ICmpInst::ICMP_EQ;
1021
1022 IRBuilder<> Builder(BI);
1023
1024 // The new loop exit condition should reuse the debug location of the
1025 // original loop exit condition.
1026 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1027 Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1028
1029 // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1030 // avoid the expensive expansion of the limit expression in the wider type,
1031 // emit a truncate to narrow the IV to the ExitCount type. This is safe
1032 // since we know (from the exit count bitwidth), that we can't self-wrap in
1033 // the narrower type.
1034 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1035 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1036 if (CmpIndVarSize > ExitCntSize) {
1037 assert(!CmpIndVar->getType()->isPointerTy() &&
1038 !ExitCnt->getType()->isPointerTy());
1039
1040 // Before resorting to actually inserting the truncate, use the same
1041 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1042 // the other side of the comparison instead. We still evaluate the limit
1043 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1044 // a truncate within in.
1045 bool Extended = false;
1046 const SCEV *IV = SE->getSCEV(CmpIndVar);
1047 const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType());
1048 const SCEV *ZExtTrunc =
1049 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1050
1051 if (ZExtTrunc == IV) {
1052 Extended = true;
1053 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1054 "wide.trip.count");
1055 } else {
1056 const SCEV *SExtTrunc =
1057 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1058 if (SExtTrunc == IV) {
1059 Extended = true;
1060 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1061 "wide.trip.count");
1062 }
1063 }
1064
1065 if (Extended) {
1066 bool Discard;
1067 L->makeLoopInvariant(ExitCnt, Discard);
1068 } else
1069 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1070 "lftr.wideiv");
1071 }
1072 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1073 << " LHS:" << *CmpIndVar << '\n'
1074 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1075 << "\n"
1076 << " RHS:\t" << *ExitCnt << "\n"
1077 << "ExitCount:\t" << *ExitCount << "\n"
1078 << " was: " << *BI->getCondition() << "\n");
1079
1080 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1081 Value *OrigCond = BI->getCondition();
1082 // It's tempting to use replaceAllUsesWith here to fully replace the old
1083 // comparison, but that's not immediately safe, since users of the old
1084 // comparison may not be dominated by the new comparison. Instead, just
1085 // update the branch to use the new comparison; in the common case this
1086 // will make old comparison dead.
1087 BI->setCondition(Cond);
1088 DeadInsts.emplace_back(OrigCond);
1089
1090 ++NumLFTR;
1091 return true;
1092}
1093
1094static void replaceExitCond(BranchInst *BI, Value *NewCond,
1096 auto *OldCond = BI->getCondition();
1097 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1098 << " with " << *NewCond << "\n");
1099 BI->setCondition(NewCond);
1100 if (OldCond->use_empty())
1101 DeadInsts.emplace_back(OldCond);
1102}
1103
1104static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1105 bool IsTaken) {
1106 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1107 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1108 auto *OldCond = BI->getCondition();
1109 return ConstantInt::get(OldCond->getType(),
1110 IsTaken ? ExitIfTrue : !ExitIfTrue);
1111}
1112
1113static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1115 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1116 auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1117 replaceExitCond(BI, NewCond, DeadInsts);
1118}
1119
1121 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1122 ScalarEvolution &SE) {
1123 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1124 auto *LoopPreheader = L->getLoopPreheader();
1125 auto *LoopHeader = L->getHeader();
1127 for (auto &PN : LoopHeader->phis()) {
1128 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1129 for (User *U : PN.users())
1130 Worklist.push_back(cast<Instruction>(U));
1131 SE.forgetValue(&PN);
1132 PN.replaceAllUsesWith(PreheaderIncoming);
1133 DeadInsts.emplace_back(&PN);
1134 }
1135
1136 // Replacing with the preheader value will often allow IV users to simplify
1137 // (especially if the preheader value is a constant).
1139 while (!Worklist.empty()) {
1140 auto *I = cast<Instruction>(Worklist.pop_back_val());
1141 if (!Visited.insert(I).second)
1142 continue;
1143
1144 // Don't simplify instructions outside the loop.
1145 if (!L->contains(I))
1146 continue;
1147
1148 Value *Res = simplifyInstruction(I, I->getDataLayout());
1149 if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1150 for (User *U : I->users())
1151 Worklist.push_back(cast<Instruction>(U));
1152 I->replaceAllUsesWith(Res);
1153 DeadInsts.emplace_back(I);
1154 }
1155 }
1156}
1157
1158static Value *
1161 SCEVExpander &Rewriter) {
1162 ICmpInst::Predicate InvariantPred = LIP.Pred;
1163 BasicBlock *Preheader = L->getLoopPreheader();
1164 assert(Preheader && "Preheader doesn't exist");
1165 Rewriter.setInsertPoint(Preheader->getTerminator());
1166 auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1167 auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1168 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1169 if (ExitIfTrue)
1170 InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1171 IRBuilder<> Builder(Preheader->getTerminator());
1172 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1173 return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1174 BI->getCondition()->getName());
1175}
1176
1177static std::optional<Value *>
1178createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1179 const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1180 ScalarEvolution *SE, SCEVExpander &Rewriter) {
1181 CmpPredicate Pred = ICmp->getCmpPredicate();
1182 Value *LHS = ICmp->getOperand(0);
1183 Value *RHS = ICmp->getOperand(1);
1184
1185 // 'LHS pred RHS' should now mean that we stay in loop.
1186 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1187 if (Inverted)
1189
1190 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1191 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1192 // Can we prove it to be trivially true or false?
1193 if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1194 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1195
1196 auto *ARTy = LHSS->getType();
1197 auto *MaxIterTy = MaxIter->getType();
1198 // If possible, adjust types.
1199 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1200 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1201 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1202 const SCEV *MinusOne = SE->getMinusOne(ARTy);
1203 const SCEV *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1204 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1205 MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1206 }
1207
1208 if (SkipLastIter) {
1209 // Semantically skip last iter is "subtract 1, do not bother about unsigned
1210 // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1211 // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1212 // So we manually construct umin(a - 1, b - 1).
1214 if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1215 for (const SCEV *Op : UMin->operands())
1216 Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1217 MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1218 } else
1219 MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1220 }
1221
1222 // Check if there is a loop-invariant predicate equivalent to our check.
1223 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1224 L, BI, MaxIter);
1225 if (!LIP)
1226 return std::nullopt;
1227
1228 // Can we prove it to be trivially true?
1229 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1230 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1231 else
1232 return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1233}
1234
1236 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1237 bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1239 assert(
1240 (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1241 "Not a loop exit!");
1242
1243 // For branch that stays in loop by TRUE condition, go through AND. For branch
1244 // that stays in loop by FALSE condition, go through OR. Both gives the
1245 // similar logic: "stay in loop iff all conditions are true(false)".
1246 bool Inverted = L->contains(BI->getSuccessor(1));
1247 SmallVector<ICmpInst *, 4> LeafConditions;
1248 SmallVector<Value *, 4> Worklist;
1250 Value *OldCond = BI->getCondition();
1251 Visited.insert(OldCond);
1252 Worklist.push_back(OldCond);
1253
1254 auto GoThrough = [&](Value *V) {
1255 Value *LHS = nullptr, *RHS = nullptr;
1256 if (Inverted) {
1257 if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1258 return false;
1259 } else {
1260 if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1261 return false;
1262 }
1263 if (Visited.insert(LHS).second)
1264 Worklist.push_back(LHS);
1265 if (Visited.insert(RHS).second)
1266 Worklist.push_back(RHS);
1267 return true;
1268 };
1269
1270 do {
1271 Value *Curr = Worklist.pop_back_val();
1272 // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1273 // those with one use, to avoid instruction duplication.
1274 if (Curr->hasOneUse())
1275 if (!GoThrough(Curr))
1276 if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1277 LeafConditions.push_back(ICmp);
1278 } while (!Worklist.empty());
1279
1280 // If the current basic block has the same exit count as the whole loop, and
1281 // it consists of multiple icmp's, try to collect all icmp's that give exact
1282 // same exit count. For all other icmp's, we could use one less iteration,
1283 // because their value on the last iteration doesn't really matter.
1284 SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1285 if (!SkipLastIter && LeafConditions.size() > 1 &&
1286 SE->getExitCount(L, ExitingBB,
1288 MaxIter)
1289 for (auto *ICmp : LeafConditions) {
1290 auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1291 /*ControlsExit*/ false);
1292 const SCEV *ExitMax = EL.SymbolicMaxNotTaken;
1293 if (isa<SCEVCouldNotCompute>(ExitMax))
1294 continue;
1295 // They could be of different types (specifically this happens after
1296 // IV widening).
1297 auto *WiderType =
1298 SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1299 const SCEV *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1300 const SCEV *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1301 if (WideExitMax == WideMaxIter)
1302 ICmpsFailingOnLastIter.insert(ICmp);
1303 }
1304
1305 bool Changed = false;
1306 for (auto *OldCond : LeafConditions) {
1307 // Skip last iteration for this icmp under one of two conditions:
1308 // - We do it for all conditions;
1309 // - There is another ICmp that would fail on last iter, so this one doesn't
1310 // really matter.
1311 bool OptimisticSkipLastIter = SkipLastIter;
1312 if (!OptimisticSkipLastIter) {
1313 if (ICmpsFailingOnLastIter.size() > 1)
1314 OptimisticSkipLastIter = true;
1315 else if (ICmpsFailingOnLastIter.size() == 1)
1316 OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1317 }
1318 if (auto Replaced =
1319 createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1320 OptimisticSkipLastIter, SE, Rewriter)) {
1321 Changed = true;
1322 auto *NewCond = *Replaced;
1323 if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1324 NCI->setName(OldCond->getName() + ".first_iter");
1325 }
1326 LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1327 << " with " << *NewCond << "\n");
1328 assert(OldCond->hasOneUse() && "Must be!");
1329 OldCond->replaceAllUsesWith(NewCond);
1330 DeadInsts.push_back(OldCond);
1331 // Make sure we no longer consider this condition as failing on last
1332 // iteration.
1333 ICmpsFailingOnLastIter.erase(OldCond);
1334 }
1335 }
1336 return Changed;
1337}
1338
1339bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1340 // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1341 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1342 // never reaches the icmp since the zext doesn't fold to an AddRec unless
1343 // it already has flags. The alternative to this would be to extending the
1344 // set of "interesting" IV users to include the icmp, but doing that
1345 // regresses results in practice by querying SCEVs before trip counts which
1346 // rely on them which results in SCEV caching sub-optimal answers. The
1347 // concern about caching sub-optimal results is why we only query SCEVs of
1348 // the loop invariant RHS here.
1349 SmallVector<BasicBlock*, 16> ExitingBlocks;
1350 L->getExitingBlocks(ExitingBlocks);
1351 bool Changed = false;
1352 for (auto *ExitingBB : ExitingBlocks) {
1353 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1354 if (!BI)
1355 continue;
1356 assert(BI->isConditional() && "exit branch must be conditional");
1357
1358 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1359 if (!ICmp || !ICmp->hasOneUse())
1360 continue;
1361
1362 auto *LHS = ICmp->getOperand(0);
1363 auto *RHS = ICmp->getOperand(1);
1364 // For the range reasoning, avoid computing SCEVs in the loop to avoid
1365 // poisoning cache with sub-optimal results. For the must-execute case,
1366 // this is a neccessary precondition for correctness.
1367 if (!L->isLoopInvariant(RHS)) {
1368 if (!L->isLoopInvariant(LHS))
1369 continue;
1370 // Same logic applies for the inverse case
1371 std::swap(LHS, RHS);
1372 }
1373
1374 // Match (icmp signed-cond zext, RHS)
1375 Value *LHSOp = nullptr;
1376 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1377 continue;
1378
1379 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1380 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1381 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1382 FullCR = FullCR.zeroExtend(OuterBitWidth);
1383 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1384 if (FullCR.contains(RHSCR)) {
1385 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1386 // replace the signed condition with the unsigned version.
1387 ICmp->setPredicate(ICmp->getUnsignedPredicate());
1388 Changed = true;
1389 // Note: No SCEV invalidation needed. We've changed the predicate, but
1390 // have not changed exit counts, or the values produced by the compare.
1391 continue;
1392 }
1393 }
1394
1395 // Now that we've canonicalized the condition to match the extend,
1396 // see if we can rotate the extend out of the loop.
1397 for (auto *ExitingBB : ExitingBlocks) {
1398 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1399 if (!BI)
1400 continue;
1401 assert(BI->isConditional() && "exit branch must be conditional");
1402
1403 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1404 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1405 continue;
1406
1407 bool Swapped = false;
1408 auto *LHS = ICmp->getOperand(0);
1409 auto *RHS = ICmp->getOperand(1);
1410 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1411 // Nothing to rotate
1412 continue;
1413 if (L->isLoopInvariant(LHS)) {
1414 // Same logic applies for the inverse case until we actually pick
1415 // which operand of the compare to update.
1416 Swapped = true;
1417 std::swap(LHS, RHS);
1418 }
1419 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1420
1421 // Match (icmp unsigned-cond zext, RHS)
1422 // TODO: Extend to handle corresponding sext/signed-cmp case
1423 // TODO: Extend to other invertible functions
1424 Value *LHSOp = nullptr;
1425 if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1426 continue;
1427
1428 // In general, we only rotate if we can do so without increasing the number
1429 // of instructions. The exception is when we have an zext(add-rec). The
1430 // reason for allowing this exception is that we know we need to get rid
1431 // of the zext for SCEV to be able to compute a trip count for said loops;
1432 // we consider the new trip count valuable enough to increase instruction
1433 // count by one.
1434 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1435 continue;
1436
1437 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1438 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1439 // when zext is loop varying and RHS is loop invariant. This converts
1440 // loop varying work to loop-invariant work.
1441 auto doRotateTransform = [&]() {
1442 assert(ICmp->isUnsigned() && "must have proven unsigned already");
1443 auto *NewRHS = CastInst::Create(
1444 Instruction::Trunc, RHS, LHSOp->getType(), "",
1445 L->getLoopPreheader()->getTerminator()->getIterator());
1446 // NewRHS is an operation that has been hoisted out of the loop, and
1447 // therefore should have a dropped location.
1448 NewRHS->setDebugLoc(DebugLoc::getDropped());
1449 ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1450 ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1451 // Samesign flag cannot be preserved after narrowing the compare.
1452 ICmp->setSameSign(false);
1453 if (LHS->use_empty())
1454 DeadInsts.push_back(LHS);
1455 };
1456
1457 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1458 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1459 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1460 FullCR = FullCR.zeroExtend(OuterBitWidth);
1461 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1462 if (FullCR.contains(RHSCR)) {
1463 doRotateTransform();
1464 Changed = true;
1465 // Note, we are leaving SCEV in an unfortunately imprecise case here
1466 // as rotation tends to reveal information about trip counts not
1467 // previously visible.
1468 continue;
1469 }
1470 }
1471
1472 return Changed;
1473}
1474
1475bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1476 SmallVector<BasicBlock*, 16> ExitingBlocks;
1477 L->getExitingBlocks(ExitingBlocks);
1478
1479 // Remove all exits which aren't both rewriteable and execute on every
1480 // iteration.
1481 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1482 // If our exitting block exits multiple loops, we can only rewrite the
1483 // innermost one. Otherwise, we're changing how many times the innermost
1484 // loop runs before it exits.
1485 if (LI->getLoopFor(ExitingBB) != L)
1486 return true;
1487
1488 // Can't rewrite non-branch yet.
1489 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1490 if (!BI)
1491 return true;
1492
1493 // Likewise, the loop latch must be dominated by the exiting BB.
1494 if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1495 return true;
1496
1497 if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1498 // If already constant, nothing to do. However, if this is an
1499 // unconditional exit, we can still replace header phis with their
1500 // preheader value.
1501 if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1502 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1503 return true;
1504 }
1505
1506 return false;
1507 });
1508
1509 if (ExitingBlocks.empty())
1510 return false;
1511
1512 // Get a symbolic upper bound on the loop backedge taken count.
1513 const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1514 if (isa<SCEVCouldNotCompute>(MaxBECount))
1515 return false;
1516
1517 // Visit our exit blocks in order of dominance. We know from the fact that
1518 // all exits must dominate the latch, so there is a total dominance order
1519 // between them.
1520 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1521 // std::sort sorts in ascending order, so we want the inverse of
1522 // the normal dominance relation.
1523 if (A == B) return false;
1524 if (DT->properlyDominates(A, B))
1525 return true;
1526 else {
1527 assert(DT->properlyDominates(B, A) &&
1528 "expected total dominance order!");
1529 return false;
1530 }
1531 });
1532#ifdef ASSERT
1533 for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1534 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1535 }
1536#endif
1537
1538 bool Changed = false;
1539 bool SkipLastIter = false;
1540 const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1541 auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1542 if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1543 return;
1544 if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1545 CurrMaxExit = MaxExitCount;
1546 else
1547 CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1548 // If the loop has more than 1 iteration, all further checks will be
1549 // executed 1 iteration less.
1550 if (CurrMaxExit == MaxBECount)
1551 SkipLastIter = true;
1552 };
1553 SmallPtrSet<const SCEV *, 8> DominatingExactExitCounts;
1554 for (BasicBlock *ExitingBB : ExitingBlocks) {
1555 const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1556 const SCEV *MaxExitCount = SE->getExitCount(
1557 L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1558 if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1559 // Okay, we do not know the exit count here. Can we at least prove that it
1560 // will remain the same within iteration space?
1561 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1562 auto OptimizeCond = [&](bool SkipLastIter) {
1563 return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1564 MaxBECount, SkipLastIter,
1565 SE, Rewriter, DeadInsts);
1566 };
1567
1568 // TODO: We might have proved that we can skip the last iteration for
1569 // this check. In this case, we only want to check the condition on the
1570 // pre-last iteration (MaxBECount - 1). However, there is a nasty
1571 // corner case:
1572 //
1573 // for (i = len; i != 0; i--) { ... check (i ult X) ... }
1574 //
1575 // If we could not prove that len != 0, then we also could not prove that
1576 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1577 // OptimizeCond will likely not prove anything for it, even if it could
1578 // prove the same fact for len.
1579 //
1580 // As a temporary solution, we query both last and pre-last iterations in
1581 // hope that we will be able to prove triviality for at least one of
1582 // them. We can stop querying MaxBECount for this case once SCEV
1583 // understands that (MaxBECount - 1) will not overflow here.
1584 if (OptimizeCond(false))
1585 Changed = true;
1586 else if (SkipLastIter && OptimizeCond(true))
1587 Changed = true;
1588 UpdateSkipLastIter(MaxExitCount);
1589 continue;
1590 }
1591
1592 UpdateSkipLastIter(ExactExitCount);
1593
1594 // If we know we'd exit on the first iteration, rewrite the exit to
1595 // reflect this. This does not imply the loop must exit through this
1596 // exit; there may be an earlier one taken on the first iteration.
1597 // We know that the backedge can't be taken, so we replace all
1598 // the header PHIs with values coming from the preheader.
1599 if (ExactExitCount->isZero()) {
1600 foldExit(L, ExitingBB, true, DeadInsts);
1601 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1602 Changed = true;
1603 continue;
1604 }
1605
1606 assert(ExactExitCount->getType()->isIntegerTy() &&
1607 MaxBECount->getType()->isIntegerTy() &&
1608 "Exit counts must be integers");
1609
1610 Type *WiderType =
1611 SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1612 ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1613 MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1614 assert(MaxBECount->getType() == ExactExitCount->getType());
1615
1616 // Can we prove that some other exit must be taken strictly before this
1617 // one?
1618 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1619 ExactExitCount)) {
1620 foldExit(L, ExitingBB, false, DeadInsts);
1621 Changed = true;
1622 continue;
1623 }
1624
1625 // As we run, keep track of which exit counts we've encountered. If we
1626 // find a duplicate, we've found an exit which would have exited on the
1627 // exiting iteration, but (from the visit order) strictly follows another
1628 // which does the same and is thus dead.
1629 if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1630 foldExit(L, ExitingBB, false, DeadInsts);
1631 Changed = true;
1632 continue;
1633 }
1634
1635 // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1636 // here. If we kept track of the min of dominanting exits so far, we could
1637 // discharge exits with EC >= MDEC. This is less powerful than the existing
1638 // transform (since later exits aren't considered), but potentially more
1639 // powerful for any case where SCEV can prove a >=u b, but neither a == b
1640 // or a >u b. Such a case is not currently known.
1641 }
1642 return Changed;
1643}
1644
1645static bool crashingBBWithoutEffect(const BasicBlock &BB) {
1646 return llvm::all_of(BB, [](const Instruction &I) {
1647 // TODO: for now this is overly restrictive, to make sure nothing in this
1648 // BB can depend on the loop body.
1649 // It's not enough to check for !I.mayHaveSideEffects(), because e.g. a
1650 // load does not have a side effect, but we could have
1651 // %a = load ptr, ptr %ptr
1652 // %b = load i32, ptr %a
1653 // Now if the loop stored a non-nullptr to %a, we could cause a nullptr
1654 // dereference by skipping over loop iterations.
1655 if (const auto *CB = dyn_cast<CallBase>(&I)) {
1656 if (CB->onlyAccessesInaccessibleMemory())
1657 return true;
1658 }
1659 return isa<UnreachableInst>(I);
1660 });
1661}
1662
1663bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1664 SmallVector<BasicBlock*, 16> ExitingBlocks;
1665 L->getExitingBlocks(ExitingBlocks);
1666
1667 // Finally, see if we can rewrite our exit conditions into a loop invariant
1668 // form. If we have a read-only loop, and we can tell that we must exit down
1669 // a path which does not need any of the values computed within the loop, we
1670 // can rewrite the loop to exit on the first iteration. Note that this
1671 // doesn't either a) tell us the loop exits on the first iteration (unless
1672 // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1673 // This transformation looks a lot like a restricted form of dead loop
1674 // elimination, but restricted to read-only loops and without neccesssarily
1675 // needing to kill the loop entirely.
1676 if (!LoopPredication)
1677 return false;
1678
1679 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1680 // through *explicit* control flow. We have to eliminate the possibility of
1681 // implicit exits (see below) before we know it's truly exact.
1682 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1683 if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1684 return false;
1685
1686 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1687 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1688
1689 auto BadExit = [&](BasicBlock *ExitingBB) {
1690 // If our exiting block exits multiple loops, we can only rewrite the
1691 // innermost one. Otherwise, we're changing how many times the innermost
1692 // loop runs before it exits.
1693 if (LI->getLoopFor(ExitingBB) != L)
1694 return true;
1695
1696 // Can't rewrite non-branch yet.
1697 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1698 if (!BI)
1699 return true;
1700
1701 // If already constant, nothing to do.
1702 if (isa<Constant>(BI->getCondition()))
1703 return true;
1704
1705 // If the exit block has phis, we need to be able to compute the values
1706 // within the loop which contains them. This assumes trivially lcssa phis
1707 // have already been removed; TODO: generalize
1708 BasicBlock *ExitBlock =
1709 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1710 if (!ExitBlock->phis().empty())
1711 return true;
1712
1713 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1714 if (isa<SCEVCouldNotCompute>(ExitCount) ||
1715 !Rewriter.isSafeToExpand(ExitCount))
1716 return true;
1717
1718 assert(SE->isLoopInvariant(ExitCount, L) &&
1719 "Exit count must be loop invariant");
1720 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1721 return false;
1722 };
1723
1724 // Make sure all exits dominate the latch. This means there is a linear chain
1725 // of exits. We check this before sorting so we have a total order.
1726 BasicBlock *Latch = L->getLoopLatch();
1727 for (BasicBlock *ExitingBB : ExitingBlocks)
1728 if (!DT->dominates(ExitingBB, Latch))
1729 return false;
1730
1731 // If we have any exits which can't be predicated themselves, than we can't
1732 // predicate any exit which isn't guaranteed to execute before it. Consider
1733 // two exits (a) and (b) which would both exit on the same iteration. If we
1734 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1735 // we could convert a loop from exiting through (a) to one exiting through
1736 // (b). Note that this problem exists only for exits with the same exit
1737 // count, and we could be more aggressive when exit counts are known inequal.
1738 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1739 // llvm::sort sorts in ascending order, so we want the inverse of
1740 // the normal dominance relation.
1741 if (A == B)
1742 return false;
1743 if (DT->properlyDominates(A, B))
1744 return true;
1745 if (DT->properlyDominates(B, A))
1746 return false;
1747 llvm_unreachable("Should have total dominance order");
1748 });
1749
1750 // Make sure our exit blocks are really a total order (i.e. a linear chain of
1751 // exits before the backedge).
1752 for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1753 assert(DT->dominates(ExitingBlocks[i - 1], ExitingBlocks[i]) &&
1754 "Not sorted by dominance");
1755
1756 // Given our sorted total order, we know that exit[j] must be evaluated
1757 // after all exit[i] such j > i.
1758 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1759 if (BadExit(ExitingBlocks[i])) {
1760 ExitingBlocks.resize(i);
1761 break;
1762 }
1763
1764 if (ExitingBlocks.empty())
1765 return false;
1766
1767 // At this point, ExitingBlocks consists of only those blocks which are
1768 // predicatable. Given that, we know we have at least one exit we can
1769 // predicate if the loop is doesn't have side effects and doesn't have any
1770 // implicit exits (because then our exact BTC isn't actually exact).
1771 // @Reviewers - As structured, this is O(I^2) for loop nests. Any
1772 // suggestions on how to improve this? I can obviously bail out for outer
1773 // loops, but that seems less than ideal. MemorySSA can find memory writes,
1774 // is that enough for *all* side effects?
1775 bool HasThreadLocalSideEffects = false;
1776 for (BasicBlock *BB : L->blocks())
1777 for (auto &I : *BB)
1778 // TODO:isGuaranteedToTransfer
1779 if (I.mayHaveSideEffects()) {
1781 return false;
1782 HasThreadLocalSideEffects = true;
1783 if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
1784 // Simple stores cannot be observed by other threads.
1785 // If HasThreadLocalSideEffects is set, we check
1786 // crashingBBWithoutEffect to make sure that the crashing BB cannot
1787 // observe them either.
1788 if (!SI->isSimple())
1789 return false;
1790 } else {
1791 return false;
1792 }
1793 }
1794
1795 bool Changed = false;
1796 // Finally, do the actual predication for all predicatable blocks. A couple
1797 // of notes here:
1798 // 1) We don't bother to constant fold dominated exits with identical exit
1799 // counts; that's simply a form of CSE/equality propagation and we leave
1800 // it for dedicated passes.
1801 // 2) We insert the comparison at the branch. Hoisting introduces additional
1802 // legality constraints and we leave that to dedicated logic. We want to
1803 // predicate even if we can't insert a loop invariant expression as
1804 // peeling or unrolling will likely reduce the cost of the otherwise loop
1805 // varying check.
1806 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1807 IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1808 Value *ExactBTCV = nullptr; // Lazily generated if needed.
1809 for (BasicBlock *ExitingBB : ExitingBlocks) {
1810 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1811
1812 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1813 if (HasThreadLocalSideEffects) {
1814 const BasicBlock *Unreachable = nullptr;
1815 for (const BasicBlock *Succ : BI->successors()) {
1816 if (isa<UnreachableInst>(Succ->getTerminator()))
1817 Unreachable = Succ;
1818 }
1819 // Exit BB which have one branch back into the loop and another one to
1820 // a trap can still be optimized, because local side effects cannot
1821 // be observed in the exit case (the trap). We could be smarter about
1822 // this, but for now lets pattern match common cases that directly trap.
1823 if (Unreachable == nullptr || !crashingBBWithoutEffect(*Unreachable))
1824 return Changed;
1825 }
1826 Value *NewCond;
1827 if (ExitCount == ExactBTC) {
1828 NewCond = L->contains(BI->getSuccessor(0)) ?
1829 B.getFalse() : B.getTrue();
1830 } else {
1831 Value *ECV = Rewriter.expandCodeFor(ExitCount);
1832 if (!ExactBTCV)
1833 ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1834 Value *RHS = ExactBTCV;
1835 if (ECV->getType() != RHS->getType()) {
1836 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1837 ECV = B.CreateZExt(ECV, WiderTy);
1838 RHS = B.CreateZExt(RHS, WiderTy);
1839 }
1840 auto Pred = L->contains(BI->getSuccessor(0)) ?
1841 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1842 NewCond = B.CreateICmp(Pred, ECV, RHS);
1843 }
1844 Value *OldCond = BI->getCondition();
1845 BI->setCondition(NewCond);
1846 if (OldCond->use_empty())
1847 DeadInsts.emplace_back(OldCond);
1848 Changed = true;
1849 RunUnswitching = true;
1850 }
1851
1852 return Changed;
1853}
1854
1855//===----------------------------------------------------------------------===//
1856// IndVarSimplify driver. Manage several subpasses of IV simplification.
1857//===----------------------------------------------------------------------===//
1858
1859bool IndVarSimplify::run(Loop *L) {
1860 // We need (and expect!) the incoming loop to be in LCSSA.
1861 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1862 "LCSSA required to run indvars!");
1863
1864 // If LoopSimplify form is not available, stay out of trouble. Some notes:
1865 // - LSR currently only supports LoopSimplify-form loops. Indvars'
1866 // canonicalization can be a pessimization without LSR to "clean up"
1867 // afterwards.
1868 // - We depend on having a preheader; in particular,
1869 // Loop::getCanonicalInductionVariable only supports loops with preheaders,
1870 // and we're in trouble if we can't find the induction variable even when
1871 // we've manually inserted one.
1872 // - LFTR relies on having a single backedge.
1873 if (!L->isLoopSimplifyForm())
1874 return false;
1875
1876 bool Changed = false;
1877 // If there are any floating-point recurrences, attempt to
1878 // transform them to use integer recurrences.
1879 Changed |= rewriteNonIntegerIVs(L);
1880
1881 // Create a rewriter object which we'll use to transform the code with.
1882 SCEVExpander Rewriter(*SE, DL, "indvars");
1883#if LLVM_ENABLE_ABI_BREAKING_CHECKS
1884 Rewriter.setDebugType(DEBUG_TYPE);
1885#endif
1886
1887 // Eliminate redundant IV users.
1888 //
1889 // Simplification works best when run before other consumers of SCEV. We
1890 // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1891 // other expressions involving loop IVs have been evaluated. This helps SCEV
1892 // set no-wrap flags before normalizing sign/zero extension.
1893 Rewriter.disableCanonicalMode();
1894 Changed |= simplifyAndExtend(L, Rewriter, LI);
1895
1896 // Check to see if we can compute the final value of any expressions
1897 // that are recurrent in the loop, and substitute the exit values from the
1898 // loop into any instructions outside of the loop that use the final values
1899 // of the current expressions.
1900 if (ReplaceExitValue != NeverRepl) {
1901 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1902 ReplaceExitValue, DeadInsts)) {
1903 NumReplaced += Rewrites;
1904 Changed = true;
1905 }
1906 }
1907
1908 // Eliminate redundant IV cycles.
1909 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1910
1911 // Try to convert exit conditions to unsigned and rotate computation
1912 // out of the loop. Note: Handles invalidation internally if needed.
1913 Changed |= canonicalizeExitCondition(L);
1914
1915 // Try to eliminate loop exits based on analyzeable exit counts
1916 if (optimizeLoopExits(L, Rewriter)) {
1917 Changed = true;
1918 // Given we've changed exit counts, notify SCEV
1919 // Some nested loops may share same folded exit basic block,
1920 // thus we need to notify top most loop.
1921 SE->forgetTopmostLoop(L);
1922 }
1923
1924 // Try to form loop invariant tests for loop exits by changing how many
1925 // iterations of the loop run when that is unobservable.
1926 if (predicateLoopExits(L, Rewriter)) {
1927 Changed = true;
1928 // Given we've changed exit counts, notify SCEV
1929 SE->forgetLoop(L);
1930 }
1931
1932 // If we have a trip count expression, rewrite the loop's exit condition
1933 // using it.
1934 if (!DisableLFTR) {
1935 BasicBlock *PreHeader = L->getLoopPreheader();
1936
1937 SmallVector<BasicBlock*, 16> ExitingBlocks;
1938 L->getExitingBlocks(ExitingBlocks);
1939 for (BasicBlock *ExitingBB : ExitingBlocks) {
1940 // Can't rewrite non-branch yet.
1941 if (!isa<BranchInst>(ExitingBB->getTerminator()))
1942 continue;
1943
1944 // If our exitting block exits multiple loops, we can only rewrite the
1945 // innermost one. Otherwise, we're changing how many times the innermost
1946 // loop runs before it exits.
1947 if (LI->getLoopFor(ExitingBB) != L)
1948 continue;
1949
1950 if (!needsLFTR(L, ExitingBB))
1951 continue;
1952
1953 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1954 if (isa<SCEVCouldNotCompute>(ExitCount))
1955 continue;
1956
1957 // This was handled above, but as we form SCEVs, we can sometimes refine
1958 // existing ones; this allows exit counts to be folded to zero which
1959 // weren't when optimizeLoopExits saw them. Arguably, we should iterate
1960 // until stable to handle cases like this better.
1961 if (ExitCount->isZero())
1962 continue;
1963
1964 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
1965 if (!IndVar)
1966 continue;
1967
1968 // Avoid high cost expansions. Note: This heuristic is questionable in
1969 // that our definition of "high cost" is not exactly principled.
1970 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
1971 TTI, PreHeader->getTerminator()))
1972 continue;
1973
1974 if (!Rewriter.isSafeToExpand(ExitCount))
1975 continue;
1976
1977 Changed |= linearFunctionTestReplace(L, ExitingBB,
1978 ExitCount, IndVar,
1979 Rewriter);
1980 }
1981 }
1982 // Clear the rewriter cache, because values that are in the rewriter's cache
1983 // can be deleted in the loop below, causing the AssertingVH in the cache to
1984 // trigger.
1985 Rewriter.clear();
1986
1987 // Now that we're done iterating through lists, clean up any instructions
1988 // which are now dead.
1989 while (!DeadInsts.empty()) {
1990 Value *V = DeadInsts.pop_back_val();
1991
1992 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
1993 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
1994 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
1995 Changed |=
1996 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
1997 }
1998
1999 // The Rewriter may not be used from this point on.
2000
2001 // rewriteFirstIterationLoopExitValues does not rely on the computation of
2002 // trip count and therefore can further simplify exit values in addition to
2003 // rewriteLoopExitValues.
2004 Changed |= rewriteFirstIterationLoopExitValues(L);
2005
2006 // Clean up dead instructions.
2007 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2008
2009 // Check a post-condition.
2010 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2011 "Indvars did not preserve LCSSA!");
2012 if (VerifyMemorySSA && MSSAU)
2013 MSSAU->getMemorySSA()->verifyMemorySSA();
2014
2015 return Changed;
2016}
2017
2020 LPMUpdater &) {
2021 Function *F = L.getHeader()->getParent();
2022 const DataLayout &DL = F->getDataLayout();
2023
2024 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2025 WidenIndVars && AllowIVWidening);
2026 if (!IVS.run(&L))
2027 return PreservedAnalyses::all();
2028
2029 auto PA = getLoopPassPreservedAnalyses();
2030 PA.preserveSet<CFGAnalyses>();
2031 if (IVS.runUnswitching()) {
2033 PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
2034 }
2035
2036 if (AR.MSSA)
2037 PA.preserve<MemorySSAAnalysis>();
2038 return PA;
2039}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define DEBUG_TYPE
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
static Value * genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, const SCEV *ExitCount, bool UsePostInc, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
Insert an IR expression which computes the value held by the IV IndVar (which must be an loop counter...
static void replaceExitCond(BranchInst *BI, Value *NewCond, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static cl::opt< bool > DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization"))
static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB)
Whether the current loop exit test is based on this value.
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(UnusedIndVarInLoop, "unusedindvarinloop", "only replace exit value when it is an unused " "induction variable in the loop and has cheap replacement cost"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
static bool isRepresentableAsExactInteger(ConstantFP *FPVal, int64_t IntVal)
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
Update information about the induction variable that is extended by this sign or zero extend operatio...
static void replaceLoopPHINodesWithPreheaderValues(LoopInfo *LI, Loop *L, SmallVectorImpl< WeakTrackingVH > &DeadInsts, ScalarEvolution &SE)
static bool needsLFTR(Loop *L, BasicBlock *ExitingBB)
linearFunctionTestReplace policy.
static bool optimizeLoopExitWithUnknownExitCount(const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static Value * createInvariantCond(const Loop *L, BasicBlock *ExitingBB, const ScalarEvolution::LoopInvariantPredicate &LIP, SCEVExpander &Rewriter)
static bool isLoopCounter(PHINode *Phi, Loop *L, ScalarEvolution *SE)
Return true if the given phi is a "counter" in L.
static std::optional< Value * > createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB, const SCEV *MaxIter, bool Inverted, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter)
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value * > &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L)
Given an Value which is hoped to be part of an add recurance in the given loop, return the associated...
static Constant * createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB, bool IsTaken)
static cl::opt< bool > LoopPredicationTraps("indvars-predicate-loop-traps", cl::Hidden, cl::init(true), cl::desc("Predicate conditions that trap in loops with only local writes"))
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
static PHINode * FindLoopCounter(Loop *L, BasicBlock *ExitingBB, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
Search the loop header for a loop counter (anadd rec w/step of one) suitable for use by LFTR.
static cl::opt< bool > AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), cl::desc("Allow widening of indvars to eliminate s/zext"))
static bool crashingBBWithoutEffect(const BasicBlock &BB)
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
Convert APF to an integer, if possible.
static cl::opt< bool > LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), cl::desc("Predicate conditions in read only loops"))
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:290
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:331
const fltSemantics & getSemantics() const
Definition APFloat.h:1439
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1314
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition InstrTypes.h:610
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277
const APFloat & getValueAPF() const
Definition Constants.h:320
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:131
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition DataLayout.h:229
static DebugLoc getDropped()
Definition DebugLoc.h:164
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This instruction compares its operands according to the predicate given to the constructor.
CmpPredicate getCmpPredicate() const
CmpPredicate getInverseCmpPredicate() const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition LoopInfo.h:441
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:936
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:299
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
This node represents a polynomial recurrence on the trip count of the specified loop.
LLVM_ABI const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
LLVM_ABI const SCEVAddRecExpr * getPostIncExpr(ScalarEvolution &SE) const
Return an expression representing the value of this expression one iteration of the loop ahead.
This class uses information about analyze scalars to rewrite expressions in canonical form.
This class represents an analyzed expression in the program.
LLVM_ABI bool isOne() const
Return true if the expression is a constant one.
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI std::optional< LoopInvariantPredicate > getLoopInvariantExitCondDuringFirstIterations(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, const Instruction *CtxI, const SCEV *MaxIter)
If the result of the predicate LHS Pred RHS is loop invariant with respect to L at given Context duri...
LLVM_ABI Type * getWiderType(Type *Ty1, Type *Ty2) const
LLVM_ABI bool isLoopEntryGuardedByCond(const Loop *L, CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS.
LLVM_ABI const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI ExitLimit computeExitLimitFromCond(const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates=false)
Compute the number of times the backedge of the specified loop will execute if its exit condition wer...
LLVM_ABI uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI std::optional< bool > evaluatePredicateAt(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Check whether the condition described by Pred, LHS, and RHS is true or false in the given Context.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI void forgetTopmostLoop(const Loop *L)
LLVM_ABI void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
LLVM_ABI const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getMinusOne(Type *Ty)
Return a SCEV for the constant -1 of a specific type.
LLVM_ABI const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI const SCEV * getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
Promote the operands to the wider of the types using zero-extension, and then perform a umin operatio...
LLVM_ABI const SCEV * getCouldNotCompute()
LLVM_ABI const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
@ SymbolicMaximum
An expression which provides an upper bound on the exact trip count.
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI bool isKnownPredicateAt(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
cst_pred_ty< is_one > m_scev_One()
Match an integer 1.
specificloop_ty m_SpecificLoop(const Loop *L)
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
bool match(const SCEV *S, const Pattern &P)
class_match< const SCEV > m_SCEV()
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, Instruction *OnPathTo, DominatorTree *DT)
Return true if undefined behavior would provable be executed on the path to OnPathTo if Root produced...
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
PHINode * createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges)
Widen Induction Variables - Extend the width of an IV to cover its widest uses.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI cl::opt< unsigned > SCEVCheapExpansionBudget
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
std::pair< bool, bool > simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition MemorySSA.cpp:84
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
DWARFExpression::Operation Op
constexpr U AbsoluteValue(T X)
Return the absolute value of a signed integer, converted to the corresponding unsigned integer type.
Definition MathExtras.h:592
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
LLVM_ABI bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond)
Return true if the induction variable IV in a Loop whose latch is LatchBlock would become dead if the...
LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
LLVM_ABI bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
Definition Local.cpp:641
@ UnusedIndVarInLoop
Definition LoopUtils.h:554
@ OnlyCheapRepl
Definition LoopUtils.h:552
@ NeverRepl
Definition LoopUtils.h:551
@ NoHardUse
Definition LoopUtils.h:553
@ AlwaysRepl
Definition LoopUtils.h:555
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
A marker analysis to determine if SimpleLoopUnswitch should run again on a given loop.
Collect information about induction variables that are used by sign/zero extend operations.