LLVM 19.0.0git
SimplifyIndVar.cpp
Go to the documentation of this file.
1//===-- SimplifyIndVar.cpp - Induction variable simplification ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements induction variable simplification. It does
10// not define any actual pass or policy, but provides a single function to
11// simplify a loop's induction variables based on ScalarEvolution.
12//
13//===----------------------------------------------------------------------===//
14
17#include "llvm/ADT/Statistic.h"
20#include "llvm/IR/Dominators.h"
21#include "llvm/IR/IRBuilder.h"
25#include "llvm/Support/Debug.h"
30
31using namespace llvm;
32using namespace llvm::PatternMatch;
33
34#define DEBUG_TYPE "indvars"
35
36STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
37STATISTIC(NumElimOperand, "Number of IV operands folded into a use");
38STATISTIC(NumFoldedUser, "Number of IV users folded into a constant");
39STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
41 NumSimplifiedSDiv,
42 "Number of IV signed division operations converted to unsigned division");
44 NumSimplifiedSRem,
45 "Number of IV signed remainder operations converted to unsigned remainder");
46STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
47
48namespace {
49 /// This is a utility for simplifying induction variables
50 /// based on ScalarEvolution. It is the primary instrument of the
51 /// IndvarSimplify pass, but it may also be directly invoked to cleanup after
52 /// other loop passes that preserve SCEV.
53 class SimplifyIndvar {
54 Loop *L;
55 LoopInfo *LI;
57 DominatorTree *DT;
61
62 bool Changed = false;
63
64 public:
65 SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT,
67 SCEVExpander &Rewriter,
69 : L(Loop), LI(LI), SE(SE), DT(DT), TTI(TTI), Rewriter(Rewriter),
70 DeadInsts(Dead) {
71 assert(LI && "IV simplification requires LoopInfo");
72 }
73
74 bool hasChanged() const { return Changed; }
75
76 /// Iteratively perform simplification on a worklist of users of the
77 /// specified induction variable. This is the top-level driver that applies
78 /// all simplifications to users of an IV.
79 void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr);
80
81 Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand);
82
83 bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);
84 bool replaceIVUserWithLoopInvariant(Instruction *UseInst);
85 bool replaceFloatIVWithIntegerIV(Instruction *UseInst);
86
87 bool eliminateOverflowIntrinsic(WithOverflowInst *WO);
88 bool eliminateSaturatingIntrinsic(SaturatingInst *SI);
89 bool eliminateTrunc(TruncInst *TI);
90 bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
91 bool makeIVComparisonInvariant(ICmpInst *ICmp, Instruction *IVOperand);
92 void eliminateIVComparison(ICmpInst *ICmp, Instruction *IVOperand);
93 void simplifyIVRemainder(BinaryOperator *Rem, Instruction *IVOperand,
94 bool IsSigned);
95 void replaceRemWithNumerator(BinaryOperator *Rem);
96 void replaceRemWithNumeratorOrZero(BinaryOperator *Rem);
97 void replaceSRemWithURem(BinaryOperator *Rem);
98 bool eliminateSDiv(BinaryOperator *SDiv);
99 bool strengthenBinaryOp(BinaryOperator *BO, Instruction *IVOperand);
100 bool strengthenOverflowingOperation(BinaryOperator *OBO,
101 Instruction *IVOperand);
102 bool strengthenRightShift(BinaryOperator *BO, Instruction *IVOperand);
103 };
104}
105
106/// Find a point in code which dominates all given instructions. We can safely
107/// assume that, whatever fact we can prove at the found point, this fact is
108/// also true for each of the given instructions.
110 DominatorTree &DT) {
111 Instruction *CommonDom = nullptr;
112 for (auto *Insn : Instructions)
113 CommonDom =
114 CommonDom ? DT.findNearestCommonDominator(CommonDom, Insn) : Insn;
115 assert(CommonDom && "Common dominator not found?");
116 return CommonDom;
117}
118
119/// Fold an IV operand into its use. This removes increments of an
120/// aligned IV when used by a instruction that ignores the low bits.
121///
122/// IVOperand is guaranteed SCEVable, but UseInst may not be.
123///
124/// Return the operand of IVOperand for this induction variable if IVOperand can
125/// be folded (in case more folding opportunities have been exposed).
126/// Otherwise return null.
127Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) {
128 Value *IVSrc = nullptr;
129 const unsigned OperIdx = 0;
130 const SCEV *FoldedExpr = nullptr;
131 bool MustDropExactFlag = false;
132 switch (UseInst->getOpcode()) {
133 default:
134 return nullptr;
135 case Instruction::UDiv:
136 case Instruction::LShr:
137 // We're only interested in the case where we know something about
138 // the numerator and have a constant denominator.
139 if (IVOperand != UseInst->getOperand(OperIdx) ||
140 !isa<ConstantInt>(UseInst->getOperand(1)))
141 return nullptr;
142
143 // Attempt to fold a binary operator with constant operand.
144 // e.g. ((I + 1) >> 2) => I >> 2
145 if (!isa<BinaryOperator>(IVOperand)
146 || !isa<ConstantInt>(IVOperand->getOperand(1)))
147 return nullptr;
148
149 IVSrc = IVOperand->getOperand(0);
150 // IVSrc must be the (SCEVable) IV, since the other operand is const.
151 assert(SE->isSCEVable(IVSrc->getType()) && "Expect SCEVable IV operand");
152
153 ConstantInt *D = cast<ConstantInt>(UseInst->getOperand(1));
154 if (UseInst->getOpcode() == Instruction::LShr) {
155 // Get a constant for the divisor. See createSCEV.
156 uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth();
157 if (D->getValue().uge(BitWidth))
158 return nullptr;
159
160 D = ConstantInt::get(UseInst->getContext(),
161 APInt::getOneBitSet(BitWidth, D->getZExtValue()));
162 }
163 const auto *LHS = SE->getSCEV(IVSrc);
164 const auto *RHS = SE->getSCEV(D);
165 FoldedExpr = SE->getUDivExpr(LHS, RHS);
166 // We might have 'exact' flag set at this point which will no longer be
167 // correct after we make the replacement.
168 if (UseInst->isExact() && LHS != SE->getMulExpr(FoldedExpr, RHS))
169 MustDropExactFlag = true;
170 }
171 // We have something that might fold it's operand. Compare SCEVs.
172 if (!SE->isSCEVable(UseInst->getType()))
173 return nullptr;
174
175 // Bypass the operand if SCEV can prove it has no effect.
176 if (SE->getSCEV(UseInst) != FoldedExpr)
177 return nullptr;
178
179 LLVM_DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand
180 << " -> " << *UseInst << '\n');
181
182 UseInst->setOperand(OperIdx, IVSrc);
183 assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper");
184
185 if (MustDropExactFlag)
186 UseInst->dropPoisonGeneratingFlags();
187
188 ++NumElimOperand;
189 Changed = true;
190 if (IVOperand->use_empty())
191 DeadInsts.emplace_back(IVOperand);
192 return IVSrc;
193}
194
195bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
196 Instruction *IVOperand) {
197 auto *Preheader = L->getLoopPreheader();
198 if (!Preheader)
199 return false;
200 unsigned IVOperIdx = 0;
201 ICmpInst::Predicate Pred = ICmp->getPredicate();
202 if (IVOperand != ICmp->getOperand(0)) {
203 // Swapped
204 assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
205 IVOperIdx = 1;
206 Pred = ICmpInst::getSwappedPredicate(Pred);
207 }
208
209 // Get the SCEVs for the ICmp operands (in the specific context of the
210 // current loop)
211 const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
212 const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop);
213 const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop);
214 auto LIP = SE->getLoopInvariantPredicate(Pred, S, X, L, ICmp);
215 if (!LIP)
216 return false;
217 ICmpInst::Predicate InvariantPredicate = LIP->Pred;
218 const SCEV *InvariantLHS = LIP->LHS;
219 const SCEV *InvariantRHS = LIP->RHS;
220
221 // Do not generate something ridiculous.
222 auto *PHTerm = Preheader->getTerminator();
223 if (Rewriter.isHighCostExpansion({InvariantLHS, InvariantRHS}, L,
224 2 * SCEVCheapExpansionBudget, TTI, PHTerm) ||
225 !Rewriter.isSafeToExpandAt(InvariantLHS, PHTerm) ||
226 !Rewriter.isSafeToExpandAt(InvariantRHS, PHTerm))
227 return false;
228 auto *NewLHS =
229 Rewriter.expandCodeFor(InvariantLHS, IVOperand->getType(), PHTerm);
230 auto *NewRHS =
231 Rewriter.expandCodeFor(InvariantRHS, IVOperand->getType(), PHTerm);
232 LLVM_DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n');
233 ICmp->setPredicate(InvariantPredicate);
234 ICmp->setOperand(0, NewLHS);
235 ICmp->setOperand(1, NewRHS);
236 return true;
237}
238
239/// SimplifyIVUsers helper for eliminating useless
240/// comparisons against an induction variable.
241void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp,
242 Instruction *IVOperand) {
243 unsigned IVOperIdx = 0;
244 ICmpInst::Predicate Pred = ICmp->getPredicate();
245 ICmpInst::Predicate OriginalPred = Pred;
246 if (IVOperand != ICmp->getOperand(0)) {
247 // Swapped
248 assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
249 IVOperIdx = 1;
250 Pred = ICmpInst::getSwappedPredicate(Pred);
251 }
252
253 // Get the SCEVs for the ICmp operands (in the specific context of the
254 // current loop)
255 const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
256 const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop);
257 const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop);
258
259 // If the condition is always true or always false in the given context,
260 // replace it with a constant value.
262 for (auto *U : ICmp->users())
263 Users.push_back(cast<Instruction>(U));
264 const Instruction *CtxI = findCommonDominator(Users, *DT);
265 if (auto Ev = SE->evaluatePredicateAt(Pred, S, X, CtxI)) {
266 SE->forgetValue(ICmp);
268 DeadInsts.emplace_back(ICmp);
269 LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
270 } else if (makeIVComparisonInvariant(ICmp, IVOperand)) {
271 // fallthrough to end of function
272 } else if (ICmpInst::isSigned(OriginalPred) &&
273 SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) {
274 // If we were unable to make anything above, all we can is to canonicalize
275 // the comparison hoping that it will open the doors for other
276 // optimizations. If we find out that we compare two non-negative values,
277 // we turn the instruction's predicate to its unsigned version. Note that
278 // we cannot rely on Pred here unless we check if we have swapped it.
279 assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?");
280 LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp
281 << '\n');
282 ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred));
283 } else
284 return;
285
286 ++NumElimCmp;
287 Changed = true;
288}
289
290bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) {
291 // Get the SCEVs for the ICmp operands.
292 auto *N = SE->getSCEV(SDiv->getOperand(0));
293 auto *D = SE->getSCEV(SDiv->getOperand(1));
294
295 // Simplify unnecessary loops away.
296 const Loop *L = LI->getLoopFor(SDiv->getParent());
297 N = SE->getSCEVAtScope(N, L);
298 D = SE->getSCEVAtScope(D, L);
299
300 // Replace sdiv by udiv if both of the operands are non-negative
301 if (SE->isKnownNonNegative(N) && SE->isKnownNonNegative(D)) {
302 auto *UDiv = BinaryOperator::Create(
303 BinaryOperator::UDiv, SDiv->getOperand(0), SDiv->getOperand(1),
304 SDiv->getName() + ".udiv", SDiv->getIterator());
305 UDiv->setIsExact(SDiv->isExact());
306 SDiv->replaceAllUsesWith(UDiv);
307 LLVM_DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv << '\n');
308 ++NumSimplifiedSDiv;
309 Changed = true;
310 DeadInsts.push_back(SDiv);
311 return true;
312 }
313
314 return false;
315}
316
317// i %s n -> i %u n if i >= 0 and n >= 0
318void SimplifyIndvar::replaceSRemWithURem(BinaryOperator *Rem) {
319 auto *N = Rem->getOperand(0), *D = Rem->getOperand(1);
320 auto *URem = BinaryOperator::Create(BinaryOperator::URem, N, D,
321 Rem->getName() + ".urem", Rem->getIterator());
322 Rem->replaceAllUsesWith(URem);
323 LLVM_DEBUG(dbgs() << "INDVARS: Simplified srem: " << *Rem << '\n');
324 ++NumSimplifiedSRem;
325 Changed = true;
326 DeadInsts.emplace_back(Rem);
327}
328
329// i % n --> i if i is in [0,n).
330void SimplifyIndvar::replaceRemWithNumerator(BinaryOperator *Rem) {
331 Rem->replaceAllUsesWith(Rem->getOperand(0));
332 LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
333 ++NumElimRem;
334 Changed = true;
335 DeadInsts.emplace_back(Rem);
336}
337
338// (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
339void SimplifyIndvar::replaceRemWithNumeratorOrZero(BinaryOperator *Rem) {
340 auto *T = Rem->getType();
341 auto *N = Rem->getOperand(0), *D = Rem->getOperand(1);
342 ICmpInst *ICmp = new ICmpInst(Rem->getIterator(), ICmpInst::ICMP_EQ, N, D);
343 SelectInst *Sel =
344 SelectInst::Create(ICmp, ConstantInt::get(T, 0), N, "iv.rem", Rem->getIterator());
345 Rem->replaceAllUsesWith(Sel);
346 LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
347 ++NumElimRem;
348 Changed = true;
349 DeadInsts.emplace_back(Rem);
350}
351
352/// SimplifyIVUsers helper for eliminating useless remainder operations
353/// operating on an induction variable or replacing srem by urem.
354void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem,
355 Instruction *IVOperand,
356 bool IsSigned) {
357 auto *NValue = Rem->getOperand(0);
358 auto *DValue = Rem->getOperand(1);
359 // We're only interested in the case where we know something about
360 // the numerator, unless it is a srem, because we want to replace srem by urem
361 // in general.
362 bool UsedAsNumerator = IVOperand == NValue;
363 if (!UsedAsNumerator && !IsSigned)
364 return;
365
366 const SCEV *N = SE->getSCEV(NValue);
367
368 // Simplify unnecessary loops away.
369 const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
370 N = SE->getSCEVAtScope(N, ICmpLoop);
371
372 bool IsNumeratorNonNegative = !IsSigned || SE->isKnownNonNegative(N);
373
374 // Do not proceed if the Numerator may be negative
375 if (!IsNumeratorNonNegative)
376 return;
377
378 const SCEV *D = SE->getSCEV(DValue);
379 D = SE->getSCEVAtScope(D, ICmpLoop);
380
381 if (UsedAsNumerator) {
382 auto LT = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
383 if (SE->isKnownPredicate(LT, N, D)) {
384 replaceRemWithNumerator(Rem);
385 return;
386 }
387
388 auto *T = Rem->getType();
389 const auto *NLessOne = SE->getMinusSCEV(N, SE->getOne(T));
390 if (SE->isKnownPredicate(LT, NLessOne, D)) {
391 replaceRemWithNumeratorOrZero(Rem);
392 return;
393 }
394 }
395
396 // Try to replace SRem with URem, if both N and D are known non-negative.
397 // Since we had already check N, we only need to check D now
398 if (!IsSigned || !SE->isKnownNonNegative(D))
399 return;
400
401 replaceSRemWithURem(Rem);
402}
403
404bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) {
405 const SCEV *LHS = SE->getSCEV(WO->getLHS());
406 const SCEV *RHS = SE->getSCEV(WO->getRHS());
407 if (!SE->willNotOverflow(WO->getBinaryOp(), WO->isSigned(), LHS, RHS))
408 return false;
409
410 // Proved no overflow, nuke the overflow check and, if possible, the overflow
411 // intrinsic as well.
412
414 WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), "", WO->getIterator());
415
416 if (WO->isSigned())
417 NewResult->setHasNoSignedWrap(true);
418 else
419 NewResult->setHasNoUnsignedWrap(true);
420
422
423 for (auto *U : WO->users()) {
424 if (auto *EVI = dyn_cast<ExtractValueInst>(U)) {
425 if (EVI->getIndices()[0] == 1)
426 EVI->replaceAllUsesWith(ConstantInt::getFalse(WO->getContext()));
427 else {
428 assert(EVI->getIndices()[0] == 0 && "Only two possibilities!");
429 EVI->replaceAllUsesWith(NewResult);
430 }
431 ToDelete.push_back(EVI);
432 }
433 }
434
435 for (auto *EVI : ToDelete)
436 EVI->eraseFromParent();
437
438 if (WO->use_empty())
439 WO->eraseFromParent();
440
441 Changed = true;
442 return true;
443}
444
445bool SimplifyIndvar::eliminateSaturatingIntrinsic(SaturatingInst *SI) {
446 const SCEV *LHS = SE->getSCEV(SI->getLHS());
447 const SCEV *RHS = SE->getSCEV(SI->getRHS());
448 if (!SE->willNotOverflow(SI->getBinaryOp(), SI->isSigned(), LHS, RHS))
449 return false;
450
452 SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI->getIterator());
453 if (SI->isSigned())
454 BO->setHasNoSignedWrap();
455 else
457
458 SI->replaceAllUsesWith(BO);
459 DeadInsts.emplace_back(SI);
460 Changed = true;
461 return true;
462}
463
464bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) {
465 // It is always legal to replace
466 // icmp <pred> i32 trunc(iv), n
467 // with
468 // icmp <pred> i64 sext(trunc(iv)), sext(n), if pred is signed predicate.
469 // Or with
470 // icmp <pred> i64 zext(trunc(iv)), zext(n), if pred is unsigned predicate.
471 // Or with either of these if pred is an equality predicate.
472 //
473 // If we can prove that iv == sext(trunc(iv)) or iv == zext(trunc(iv)) for
474 // every comparison which uses trunc, it means that we can replace each of
475 // them with comparison of iv against sext/zext(n). We no longer need trunc
476 // after that.
477 //
478 // TODO: Should we do this if we can widen *some* comparisons, but not all
479 // of them? Sometimes it is enough to enable other optimizations, but the
480 // trunc instruction will stay in the loop.
481 Value *IV = TI->getOperand(0);
482 Type *IVTy = IV->getType();
483 const SCEV *IVSCEV = SE->getSCEV(IV);
484 const SCEV *TISCEV = SE->getSCEV(TI);
485
486 // Check if iv == zext(trunc(iv)) and if iv == sext(trunc(iv)). If so, we can
487 // get rid of trunc
488 bool DoesSExtCollapse = false;
489 bool DoesZExtCollapse = false;
490 if (IVSCEV == SE->getSignExtendExpr(TISCEV, IVTy))
491 DoesSExtCollapse = true;
492 if (IVSCEV == SE->getZeroExtendExpr(TISCEV, IVTy))
493 DoesZExtCollapse = true;
494
495 // If neither sext nor zext does collapse, it is not profitable to do any
496 // transform. Bail.
497 if (!DoesSExtCollapse && !DoesZExtCollapse)
498 return false;
499
500 // Collect users of the trunc that look like comparisons against invariants.
501 // Bail if we find something different.
503 for (auto *U : TI->users()) {
504 // We don't care about users in unreachable blocks.
505 if (isa<Instruction>(U) &&
506 !DT->isReachableFromEntry(cast<Instruction>(U)->getParent()))
507 continue;
508 ICmpInst *ICI = dyn_cast<ICmpInst>(U);
509 if (!ICI) return false;
510 assert(L->contains(ICI->getParent()) && "LCSSA form broken?");
511 if (!(ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) &&
512 !(ICI->getOperand(1) == TI && L->isLoopInvariant(ICI->getOperand(0))))
513 return false;
514 // If we cannot get rid of trunc, bail.
515 if (ICI->isSigned() && !DoesSExtCollapse)
516 return false;
517 if (ICI->isUnsigned() && !DoesZExtCollapse)
518 return false;
519 // For equality, either signed or unsigned works.
520 ICmpUsers.push_back(ICI);
521 }
522
523 auto CanUseZExt = [&](ICmpInst *ICI) {
524 // Unsigned comparison can be widened as unsigned.
525 if (ICI->isUnsigned())
526 return true;
527 // Is it profitable to do zext?
528 if (!DoesZExtCollapse)
529 return false;
530 // For equality, we can safely zext both parts.
531 if (ICI->isEquality())
532 return true;
533 // Otherwise we can only use zext when comparing two non-negative or two
534 // negative values. But in practice, we will never pass DoesZExtCollapse
535 // check for a negative value, because zext(trunc(x)) is non-negative. So
536 // it only make sense to check for non-negativity here.
537 const SCEV *SCEVOP1 = SE->getSCEV(ICI->getOperand(0));
538 const SCEV *SCEVOP2 = SE->getSCEV(ICI->getOperand(1));
539 return SE->isKnownNonNegative(SCEVOP1) && SE->isKnownNonNegative(SCEVOP2);
540 };
541 // Replace all comparisons against trunc with comparisons against IV.
542 for (auto *ICI : ICmpUsers) {
543 bool IsSwapped = L->isLoopInvariant(ICI->getOperand(0));
544 auto *Op1 = IsSwapped ? ICI->getOperand(0) : ICI->getOperand(1);
545 IRBuilder<> Builder(ICI);
546 Value *Ext = nullptr;
547 // For signed/unsigned predicate, replace the old comparison with comparison
548 // of immediate IV against sext/zext of the invariant argument. If we can
549 // use either sext or zext (i.e. we are dealing with equality predicate),
550 // then prefer zext as a more canonical form.
551 // TODO: If we see a signed comparison which can be turned into unsigned,
552 // we can do it here for canonicalization purposes.
553 ICmpInst::Predicate Pred = ICI->getPredicate();
554 if (IsSwapped) Pred = ICmpInst::getSwappedPredicate(Pred);
555 if (CanUseZExt(ICI)) {
556 assert(DoesZExtCollapse && "Unprofitable zext?");
557 Ext = Builder.CreateZExt(Op1, IVTy, "zext");
559 } else {
560 assert(DoesSExtCollapse && "Unprofitable sext?");
561 Ext = Builder.CreateSExt(Op1, IVTy, "sext");
562 assert(Pred == ICmpInst::getSignedPredicate(Pred) && "Must be signed!");
563 }
564 bool Changed;
565 L->makeLoopInvariant(Ext, Changed);
566 (void)Changed;
567 auto *NewCmp = Builder.CreateICmp(Pred, IV, Ext);
568 ICI->replaceAllUsesWith(NewCmp);
569 DeadInsts.emplace_back(ICI);
570 }
571
572 // Trunc no longer needed.
574 DeadInsts.emplace_back(TI);
575 return true;
576}
577
578/// Eliminate an operation that consumes a simple IV and has no observable
579/// side-effect given the range of IV values. IVOperand is guaranteed SCEVable,
580/// but UseInst may not be.
581bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
582 Instruction *IVOperand) {
583 if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
584 eliminateIVComparison(ICmp, IVOperand);
585 return true;
586 }
587 if (BinaryOperator *Bin = dyn_cast<BinaryOperator>(UseInst)) {
588 bool IsSRem = Bin->getOpcode() == Instruction::SRem;
589 if (IsSRem || Bin->getOpcode() == Instruction::URem) {
590 simplifyIVRemainder(Bin, IVOperand, IsSRem);
591 return true;
592 }
593
594 if (Bin->getOpcode() == Instruction::SDiv)
595 return eliminateSDiv(Bin);
596 }
597
598 if (auto *WO = dyn_cast<WithOverflowInst>(UseInst))
599 if (eliminateOverflowIntrinsic(WO))
600 return true;
601
602 if (auto *SI = dyn_cast<SaturatingInst>(UseInst))
603 if (eliminateSaturatingIntrinsic(SI))
604 return true;
605
606 if (auto *TI = dyn_cast<TruncInst>(UseInst))
607 if (eliminateTrunc(TI))
608 return true;
609
610 if (eliminateIdentitySCEV(UseInst, IVOperand))
611 return true;
612
613 return false;
614}
615
617 if (auto *BB = L->getLoopPreheader())
618 return BB->getTerminator();
619
620 return Hint;
621}
622
623/// Replace the UseInst with a loop invariant expression if it is safe.
624bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) {
625 if (!SE->isSCEVable(I->getType()))
626 return false;
627
628 // Get the symbolic expression for this instruction.
629 const SCEV *S = SE->getSCEV(I);
630
631 if (!SE->isLoopInvariant(S, L))
632 return false;
633
634 // Do not generate something ridiculous even if S is loop invariant.
635 if (Rewriter.isHighCostExpansion(S, L, SCEVCheapExpansionBudget, TTI, I))
636 return false;
637
638 auto *IP = GetLoopInvariantInsertPosition(L, I);
639
640 if (!Rewriter.isSafeToExpandAt(S, IP)) {
641 LLVM_DEBUG(dbgs() << "INDVARS: Can not replace IV user: " << *I
642 << " with non-speculable loop invariant: " << *S << '\n');
643 return false;
644 }
645
646 auto *Invariant = Rewriter.expandCodeFor(S, I->getType(), IP);
647 bool NeedToEmitLCSSAPhis = false;
648 if (!LI->replacementPreservesLCSSAForm(I, Invariant))
649 NeedToEmitLCSSAPhis = true;
650
651 I->replaceAllUsesWith(Invariant);
652 LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I
653 << " with loop invariant: " << *S << '\n');
654
655 if (NeedToEmitLCSSAPhis) {
656 SmallVector<Instruction *, 1> NeedsLCSSAPhis;
657 NeedsLCSSAPhis.push_back(cast<Instruction>(Invariant));
658 formLCSSAForInstructions(NeedsLCSSAPhis, *DT, *LI, SE);
659 LLVM_DEBUG(dbgs() << " INDVARS: Replacement breaks LCSSA form"
660 << " inserting LCSSA Phis" << '\n');
661 }
662 ++NumFoldedUser;
663 Changed = true;
664 DeadInsts.emplace_back(I);
665 return true;
666}
667
668/// Eliminate redundant type cast between integer and float.
669bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) {
670 if (UseInst->getOpcode() != CastInst::SIToFP &&
671 UseInst->getOpcode() != CastInst::UIToFP)
672 return false;
673
674 Instruction *IVOperand = cast<Instruction>(UseInst->getOperand(0));
675 // Get the symbolic expression for this instruction.
676 const SCEV *IV = SE->getSCEV(IVOperand);
677 int MaskBits;
678 if (UseInst->getOpcode() == CastInst::SIToFP)
679 MaskBits = (int)SE->getSignedRange(IV).getMinSignedBits();
680 else
681 MaskBits = (int)SE->getUnsignedRange(IV).getActiveBits();
682 int DestNumSigBits = UseInst->getType()->getFPMantissaWidth();
683 if (MaskBits <= DestNumSigBits) {
684 for (User *U : UseInst->users()) {
685 // Match for fptosi/fptoui of sitofp and with same type.
686 auto *CI = dyn_cast<CastInst>(U);
687 if (!CI)
688 continue;
689
690 CastInst::CastOps Opcode = CI->getOpcode();
691 if (Opcode != CastInst::FPToSI && Opcode != CastInst::FPToUI)
692 continue;
693
694 Value *Conv = nullptr;
695 if (IVOperand->getType() != CI->getType()) {
696 IRBuilder<> Builder(CI);
697 StringRef Name = IVOperand->getName();
698 // To match InstCombine logic, we only need sext if both fptosi and
699 // sitofp are used. If one of them is unsigned, then we can use zext.
700 if (SE->getTypeSizeInBits(IVOperand->getType()) >
701 SE->getTypeSizeInBits(CI->getType())) {
702 Conv = Builder.CreateTrunc(IVOperand, CI->getType(), Name + ".trunc");
703 } else if (Opcode == CastInst::FPToUI ||
704 UseInst->getOpcode() == CastInst::UIToFP) {
705 Conv = Builder.CreateZExt(IVOperand, CI->getType(), Name + ".zext");
706 } else {
707 Conv = Builder.CreateSExt(IVOperand, CI->getType(), Name + ".sext");
708 }
709 } else
710 Conv = IVOperand;
711
712 CI->replaceAllUsesWith(Conv);
713 DeadInsts.push_back(CI);
714 LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *CI
715 << " with: " << *Conv << '\n');
716
717 ++NumFoldedUser;
718 Changed = true;
719 }
720 }
721
722 return Changed;
723}
724
725/// Eliminate any operation that SCEV can prove is an identity function.
726bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
727 Instruction *IVOperand) {
728 if (!SE->isSCEVable(UseInst->getType()) ||
729 UseInst->getType() != IVOperand->getType())
730 return false;
731
732 const SCEV *UseSCEV = SE->getSCEV(UseInst);
733 if (UseSCEV != SE->getSCEV(IVOperand))
734 return false;
735
736 // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the
737 // dominator tree, even if X is an operand to Y. For instance, in
738 //
739 // %iv = phi i32 {0,+,1}
740 // br %cond, label %left, label %merge
741 //
742 // left:
743 // %X = add i32 %iv, 0
744 // br label %merge
745 //
746 // merge:
747 // %M = phi (%X, %iv)
748 //
749 // getSCEV(%M) == getSCEV(%X) == {0,+,1}, but %X does not dominate %M, and
750 // %M.replaceAllUsesWith(%X) would be incorrect.
751
752 if (isa<PHINode>(UseInst))
753 // If UseInst is not a PHI node then we know that IVOperand dominates
754 // UseInst directly from the legality of SSA.
755 if (!DT || !DT->dominates(IVOperand, UseInst))
756 return false;
757
758 if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand))
759 return false;
760
761 // Make sure the operand is not more poisonous than the instruction.
762 if (!impliesPoison(IVOperand, UseInst)) {
763 SmallVector<Instruction *> DropPoisonGeneratingInsts;
764 if (!SE->canReuseInstruction(UseSCEV, IVOperand, DropPoisonGeneratingInsts))
765 return false;
766
767 for (Instruction *I : DropPoisonGeneratingInsts)
768 I->dropPoisonGeneratingFlagsAndMetadata();
769 }
770
771 LLVM_DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
772
773 SE->forgetValue(UseInst);
774 UseInst->replaceAllUsesWith(IVOperand);
775 ++NumElimIdentity;
776 Changed = true;
777 DeadInsts.emplace_back(UseInst);
778 return true;
779}
780
781bool SimplifyIndvar::strengthenBinaryOp(BinaryOperator *BO,
782 Instruction *IVOperand) {
783 return (isa<OverflowingBinaryOperator>(BO) &&
784 strengthenOverflowingOperation(BO, IVOperand)) ||
785 (isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand));
786}
787
788/// Annotate BO with nsw / nuw if it provably does not signed-overflow /
789/// unsigned-overflow. Returns true if anything changed, false otherwise.
790bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
791 Instruction *IVOperand) {
792 auto Flags = SE->getStrengthenedNoWrapFlagsFromBinOp(
793 cast<OverflowingBinaryOperator>(BO));
794
795 if (!Flags)
796 return false;
797
802
803 // The getStrengthenedNoWrapFlagsFromBinOp() check inferred additional nowrap
804 // flags on addrecs while performing zero/sign extensions. We could call
805 // forgetValue() here to make sure those flags also propagate to any other
806 // SCEV expressions based on the addrec. However, this can have pathological
807 // compile-time impact, see https://bugs.llvm.org/show_bug.cgi?id=50384.
808 return true;
809}
810
811/// Annotate the Shr in (X << IVOperand) >> C as exact using the
812/// information from the IV's range. Returns true if anything changed, false
813/// otherwise.
814bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO,
815 Instruction *IVOperand) {
816 if (BO->getOpcode() == Instruction::Shl) {
817 bool Changed = false;
818 ConstantRange IVRange = SE->getUnsignedRange(SE->getSCEV(IVOperand));
819 for (auto *U : BO->users()) {
820 const APInt *C;
821 if (match(U,
822 m_AShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C))) ||
823 match(U,
824 m_LShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C)))) {
825 BinaryOperator *Shr = cast<BinaryOperator>(U);
826 if (!Shr->isExact() && IVRange.getUnsignedMin().uge(*C)) {
827 Shr->setIsExact(true);
828 Changed = true;
829 }
830 }
831 }
832 return Changed;
833 }
834
835 return false;
836}
837
838/// Add all uses of Def to the current IV's worklist.
839static void pushIVUsers(
840 Instruction *Def, Loop *L,
842 SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
843
844 for (User *U : Def->users()) {
845 Instruction *UI = cast<Instruction>(U);
846
847 // Avoid infinite or exponential worklist processing.
848 // Also ensure unique worklist users.
849 // If Def is a LoopPhi, it may not be in the Simplified set, so check for
850 // self edges first.
851 if (UI == Def)
852 continue;
853
854 // Only change the current Loop, do not change the other parts (e.g. other
855 // Loops).
856 if (!L->contains(UI))
857 continue;
858
859 // Do not push the same instruction more than once.
860 if (!Simplified.insert(UI).second)
861 continue;
862
863 SimpleIVUsers.push_back(std::make_pair(UI, Def));
864 }
865}
866
867/// Return true if this instruction generates a simple SCEV
868/// expression in terms of that IV.
869///
870/// This is similar to IVUsers' isInteresting() but processes each instruction
871/// non-recursively when the operand is already known to be a simpleIVUser.
872///
873static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
874 if (!SE->isSCEVable(I->getType()))
875 return false;
876
877 // Get the symbolic expression for this instruction.
878 const SCEV *S = SE->getSCEV(I);
879
880 // Only consider affine recurrences.
881 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
882 if (AR && AR->getLoop() == L)
883 return true;
884
885 return false;
886}
887
888/// Iteratively perform simplification on a worklist of users
889/// of the specified induction variable. Each successive simplification may push
890/// more users which may themselves be candidates for simplification.
891///
892/// This algorithm does not require IVUsers analysis. Instead, it simplifies
893/// instructions in-place during analysis. Rather than rewriting induction
894/// variables bottom-up from their users, it transforms a chain of IVUsers
895/// top-down, updating the IR only when it encounters a clear optimization
896/// opportunity.
897///
898/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
899///
900void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
901 if (!SE->isSCEVable(CurrIV->getType()))
902 return;
903
904 // Instructions processed by SimplifyIndvar for CurrIV.
906
907 // Use-def pairs if IV users waiting to be processed for CurrIV.
909
910 // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
911 // called multiple times for the same LoopPhi. This is the proper thing to
912 // do for loop header phis that use each other.
913 pushIVUsers(CurrIV, L, Simplified, SimpleIVUsers);
914
915 while (!SimpleIVUsers.empty()) {
916 std::pair<Instruction*, Instruction*> UseOper =
917 SimpleIVUsers.pop_back_val();
918 Instruction *UseInst = UseOper.first;
919
920 // If a user of the IndVar is trivially dead, we prefer just to mark it dead
921 // rather than try to do some complex analysis or transformation (such as
922 // widening) basing on it.
923 // TODO: Propagate TLI and pass it here to handle more cases.
924 if (isInstructionTriviallyDead(UseInst, /* TLI */ nullptr)) {
925 DeadInsts.emplace_back(UseInst);
926 continue;
927 }
928
929 // Bypass back edges to avoid extra work.
930 if (UseInst == CurrIV) continue;
931
932 // Try to replace UseInst with a loop invariant before any other
933 // simplifications.
934 if (replaceIVUserWithLoopInvariant(UseInst))
935 continue;
936
937 // Go further for the bitcast 'prtoint ptr to i64' or if the cast is done
938 // by truncation
939 if ((isa<PtrToIntInst>(UseInst)) || (isa<TruncInst>(UseInst)))
940 for (Use &U : UseInst->uses()) {
941 Instruction *User = cast<Instruction>(U.getUser());
942 if (replaceIVUserWithLoopInvariant(User))
943 break; // done replacing
944 }
945
946 Instruction *IVOperand = UseOper.second;
947 for (unsigned N = 0; IVOperand; ++N) {
948 assert(N <= Simplified.size() && "runaway iteration");
949 (void) N;
950
951 Value *NewOper = foldIVUser(UseInst, IVOperand);
952 if (!NewOper)
953 break; // done folding
954 IVOperand = dyn_cast<Instruction>(NewOper);
955 }
956 if (!IVOperand)
957 continue;
958
959 if (eliminateIVUser(UseInst, IVOperand)) {
960 pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
961 continue;
962 }
963
964 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseInst)) {
965 if (strengthenBinaryOp(BO, IVOperand)) {
966 // re-queue uses of the now modified binary operator and fall
967 // through to the checks that remain.
968 pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
969 }
970 }
971
972 // Try to use integer induction for FPToSI of float induction directly.
973 if (replaceFloatIVWithIntegerIV(UseInst)) {
974 // Re-queue the potentially new direct uses of IVOperand.
975 pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
976 continue;
977 }
978
979 CastInst *Cast = dyn_cast<CastInst>(UseInst);
980 if (V && Cast) {
981 V->visitCast(Cast);
982 continue;
983 }
984 if (isSimpleIVUser(UseInst, L, SE)) {
985 pushIVUsers(UseInst, L, Simplified, SimpleIVUsers);
986 }
987 }
988}
989
990namespace llvm {
991
993
994/// Simplify instructions that use this induction variable
995/// by using ScalarEvolution to analyze the IV's recurrence.
997 LoopInfo *LI, const TargetTransformInfo *TTI,
999 SCEVExpander &Rewriter, IVVisitor *V) {
1000 SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, TTI,
1001 Rewriter, Dead);
1002 SIV.simplifyUsers(CurrIV, V);
1003 return SIV.hasChanged();
1004}
1005
1006/// Simplify users of induction variables within this
1007/// loop. This does not actually change or add IVs.
1009 LoopInfo *LI, const TargetTransformInfo *TTI,
1011 SCEVExpander Rewriter(*SE, SE->getDataLayout(), "indvars");
1012#ifndef NDEBUG
1013 Rewriter.setDebugType(DEBUG_TYPE);
1014#endif
1015 bool Changed = false;
1016 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1017 Changed |=
1018 simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, TTI, Dead, Rewriter);
1019 }
1020 return Changed;
1021}
1022
1023} // namespace llvm
1024
1025namespace {
1026//===----------------------------------------------------------------------===//
1027// Widen Induction Variables - Extend the width of an IV to cover its
1028// widest uses.
1029//===----------------------------------------------------------------------===//
1030
1031class WidenIV {
1032 // Parameters
1033 PHINode *OrigPhi;
1034 Type *WideType;
1035
1036 // Context
1037 LoopInfo *LI;
1038 Loop *L;
1039 ScalarEvolution *SE;
1040 DominatorTree *DT;
1041
1042 // Does the module have any calls to the llvm.experimental.guard intrinsic
1043 // at all? If not we can avoid scanning instructions looking for guards.
1044 bool HasGuards;
1045
1046 bool UsePostIncrementRanges;
1047
1048 // Statistics
1049 unsigned NumElimExt = 0;
1050 unsigned NumWidened = 0;
1051
1052 // Result
1053 PHINode *WidePhi = nullptr;
1054 Instruction *WideInc = nullptr;
1055 const SCEV *WideIncExpr = nullptr;
1057
1059
1060 enum class ExtendKind { Zero, Sign, Unknown };
1061
1062 // A map tracking the kind of extension used to widen each narrow IV
1063 // and narrow IV user.
1064 // Key: pointer to a narrow IV or IV user.
1065 // Value: the kind of extension used to widen this Instruction.
1066 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
1067
1068 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
1069
1070 // A map with control-dependent ranges for post increment IV uses. The key is
1071 // a pair of IV def and a use of this def denoting the context. The value is
1072 // a ConstantRange representing possible values of the def at the given
1073 // context.
1074 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
1075
1076 std::optional<ConstantRange> getPostIncRangeInfo(Value *Def,
1077 Instruction *UseI) {
1078 DefUserPair Key(Def, UseI);
1079 auto It = PostIncRangeInfos.find(Key);
1080 return It == PostIncRangeInfos.end()
1081 ? std::optional<ConstantRange>(std::nullopt)
1082 : std::optional<ConstantRange>(It->second);
1083 }
1084
1085 void calculatePostIncRanges(PHINode *OrigPhi);
1086 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
1087
1088 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
1089 DefUserPair Key(Def, UseI);
1090 auto It = PostIncRangeInfos.find(Key);
1091 if (It == PostIncRangeInfos.end())
1092 PostIncRangeInfos.insert({Key, R});
1093 else
1094 It->second = R.intersectWith(It->second);
1095 }
1096
1097public:
1098 /// Record a link in the Narrow IV def-use chain along with the WideIV that
1099 /// computes the same value as the Narrow IV def. This avoids caching Use*
1100 /// pointers.
1101 struct NarrowIVDefUse {
1102 Instruction *NarrowDef = nullptr;
1103 Instruction *NarrowUse = nullptr;
1104 Instruction *WideDef = nullptr;
1105
1106 // True if the narrow def is never negative. Tracking this information lets
1107 // us use a sign extension instead of a zero extension or vice versa, when
1108 // profitable and legal.
1109 bool NeverNegative = false;
1110
1111 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
1112 bool NeverNegative)
1113 : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
1114 NeverNegative(NeverNegative) {}
1115 };
1116
1117 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1119 bool HasGuards, bool UsePostIncrementRanges = true);
1120
1121 PHINode *createWideIV(SCEVExpander &Rewriter);
1122
1123 unsigned getNumElimExt() { return NumElimExt; };
1124 unsigned getNumWidened() { return NumWidened; };
1125
1126protected:
1127 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1128 Instruction *Use);
1129
1130 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1131 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1132 const SCEVAddRecExpr *WideAR);
1133 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1134
1135 ExtendKind getExtendKind(Instruction *I);
1136
1137 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1138
1139 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1140
1141 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1142
1143 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1144 unsigned OpCode) const;
1145
1146 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter,
1147 PHINode *OrigPhi, PHINode *WidePhi);
1148
1149 bool widenLoopCompare(NarrowIVDefUse DU);
1150 bool widenWithVariantUse(NarrowIVDefUse DU);
1151
1152 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1153
1154private:
1155 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
1156};
1157} // namespace
1158
1159/// Determine the insertion point for this user. By default, insert immediately
1160/// before the user. SCEVExpander or LICM will hoist loop invariants out of the
1161/// loop. For PHI nodes, there may be multiple uses, so compute the nearest
1162/// common dominator for the incoming blocks. A nullptr can be returned if no
1163/// viable location is found: it may happen if User is a PHI and Def only comes
1164/// to this PHI from unreachable blocks.
1166 DominatorTree *DT, LoopInfo *LI) {
1167 PHINode *PHI = dyn_cast<PHINode>(User);
1168 if (!PHI)
1169 return User;
1170
1171 Instruction *InsertPt = nullptr;
1172 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
1173 if (PHI->getIncomingValue(i) != Def)
1174 continue;
1175
1176 BasicBlock *InsertBB = PHI->getIncomingBlock(i);
1177
1178 if (!DT->isReachableFromEntry(InsertBB))
1179 continue;
1180
1181 if (!InsertPt) {
1182 InsertPt = InsertBB->getTerminator();
1183 continue;
1184 }
1185 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
1186 InsertPt = InsertBB->getTerminator();
1187 }
1188
1189 // If we have skipped all inputs, it means that Def only comes to Phi from
1190 // unreachable blocks.
1191 if (!InsertPt)
1192 return nullptr;
1193
1194 auto *DefI = dyn_cast<Instruction>(Def);
1195 if (!DefI)
1196 return InsertPt;
1197
1198 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
1199
1200 auto *L = LI->getLoopFor(DefI->getParent());
1201 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
1202
1203 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
1204 if (LI->getLoopFor(DTN->getBlock()) == L)
1205 return DTN->getBlock()->getTerminator();
1206
1207 llvm_unreachable("DefI dominates InsertPt!");
1208}
1209
1210WidenIV::WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1212 bool HasGuards, bool UsePostIncrementRanges)
1213 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1214 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1215 HasGuards(HasGuards), UsePostIncrementRanges(UsePostIncrementRanges),
1216 DeadInsts(DI) {
1217 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1218 ExtendKindMap[OrigPhi] = WI.IsSigned ? ExtendKind::Sign : ExtendKind::Zero;
1219}
1220
1221Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1222 bool IsSigned, Instruction *Use) {
1223 // Set the debug location and conservative insertion point.
1224 IRBuilder<> Builder(Use);
1225 // Hoist the insertion point into loop preheaders as far as possible.
1226 for (const Loop *L = LI->getLoopFor(Use->getParent());
1227 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1228 L = L->getParentLoop())
1229 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1230
1231 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1232 Builder.CreateZExt(NarrowOper, WideType);
1233}
1234
1235/// Instantiate a wide operation to replace a narrow operation. This only needs
1236/// to handle operations that can evaluation to SCEVAddRec. It can safely return
1237/// 0 for any operation we decide not to clone.
1238Instruction *WidenIV::cloneIVUser(WidenIV::NarrowIVDefUse DU,
1239 const SCEVAddRecExpr *WideAR) {
1240 unsigned Opcode = DU.NarrowUse->getOpcode();
1241 switch (Opcode) {
1242 default:
1243 return nullptr;
1244 case Instruction::Add:
1245 case Instruction::Mul:
1246 case Instruction::UDiv:
1247 case Instruction::Sub:
1248 return cloneArithmeticIVUser(DU, WideAR);
1249
1250 case Instruction::And:
1251 case Instruction::Or:
1252 case Instruction::Xor:
1253 case Instruction::Shl:
1254 case Instruction::LShr:
1255 case Instruction::AShr:
1256 return cloneBitwiseIVUser(DU);
1257 }
1258}
1259
1260Instruction *WidenIV::cloneBitwiseIVUser(WidenIV::NarrowIVDefUse DU) {
1261 Instruction *NarrowUse = DU.NarrowUse;
1262 Instruction *NarrowDef = DU.NarrowDef;
1263 Instruction *WideDef = DU.WideDef;
1264
1265 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1266
1267 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1268 // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1269 // invariant and will be folded or hoisted. If it actually comes from a
1270 // widened IV, it should be removed during a future call to widenIVUse.
1271 bool IsSigned = getExtendKind(NarrowDef) == ExtendKind::Sign;
1272 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1273 ? WideDef
1274 : createExtendInst(NarrowUse->getOperand(0), WideType,
1275 IsSigned, NarrowUse);
1276 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1277 ? WideDef
1278 : createExtendInst(NarrowUse->getOperand(1), WideType,
1279 IsSigned, NarrowUse);
1280
1281 auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1282 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1283 NarrowBO->getName());
1284 IRBuilder<> Builder(NarrowUse);
1285 Builder.Insert(WideBO);
1286 WideBO->copyIRFlags(NarrowBO);
1287 return WideBO;
1288}
1289
1290Instruction *WidenIV::cloneArithmeticIVUser(WidenIV::NarrowIVDefUse DU,
1291 const SCEVAddRecExpr *WideAR) {
1292 Instruction *NarrowUse = DU.NarrowUse;
1293 Instruction *NarrowDef = DU.NarrowDef;
1294 Instruction *WideDef = DU.WideDef;
1295
1296 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1297
1298 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1299
1300 // We're trying to find X such that
1301 //
1302 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1303 //
1304 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1305 // and check using SCEV if any of them are correct.
1306
1307 // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1308 // correct solution to X.
1309 auto GuessNonIVOperand = [&](bool SignExt) {
1310 const SCEV *WideLHS;
1311 const SCEV *WideRHS;
1312
1313 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1314 if (SignExt)
1315 return SE->getSignExtendExpr(S, Ty);
1316 return SE->getZeroExtendExpr(S, Ty);
1317 };
1318
1319 if (IVOpIdx == 0) {
1320 WideLHS = SE->getSCEV(WideDef);
1321 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1322 WideRHS = GetExtend(NarrowRHS, WideType);
1323 } else {
1324 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1325 WideLHS = GetExtend(NarrowLHS, WideType);
1326 WideRHS = SE->getSCEV(WideDef);
1327 }
1328
1329 // WideUse is "WideDef `op.wide` X" as described in the comment.
1330 const SCEV *WideUse =
1331 getSCEVByOpCode(WideLHS, WideRHS, NarrowUse->getOpcode());
1332
1333 return WideUse == WideAR;
1334 };
1335
1336 bool SignExtend = getExtendKind(NarrowDef) == ExtendKind::Sign;
1337 if (!GuessNonIVOperand(SignExtend)) {
1338 SignExtend = !SignExtend;
1339 if (!GuessNonIVOperand(SignExtend))
1340 return nullptr;
1341 }
1342
1343 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1344 ? WideDef
1345 : createExtendInst(NarrowUse->getOperand(0), WideType,
1346 SignExtend, NarrowUse);
1347 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1348 ? WideDef
1349 : createExtendInst(NarrowUse->getOperand(1), WideType,
1350 SignExtend, NarrowUse);
1351
1352 auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1353 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1354 NarrowBO->getName());
1355
1356 IRBuilder<> Builder(NarrowUse);
1357 Builder.Insert(WideBO);
1358 WideBO->copyIRFlags(NarrowBO);
1359 return WideBO;
1360}
1361
1362WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1363 auto It = ExtendKindMap.find(I);
1364 assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1365 return It->second;
1366}
1367
1368const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1369 unsigned OpCode) const {
1370 switch (OpCode) {
1371 case Instruction::Add:
1372 return SE->getAddExpr(LHS, RHS);
1373 case Instruction::Sub:
1374 return SE->getMinusSCEV(LHS, RHS);
1375 case Instruction::Mul:
1376 return SE->getMulExpr(LHS, RHS);
1377 case Instruction::UDiv:
1378 return SE->getUDivExpr(LHS, RHS);
1379 default:
1380 llvm_unreachable("Unsupported opcode.");
1381 };
1382}
1383
1384namespace {
1385
1386// Represents a interesting integer binary operation for
1387// getExtendedOperandRecurrence. This may be a shl that is being treated as a
1388// multiply or a 'or disjoint' that is being treated as 'add nsw nuw'.
1389struct BinaryOp {
1390 unsigned Opcode;
1391 std::array<Value *, 2> Operands;
1392 bool IsNSW = false;
1393 bool IsNUW = false;
1394
1395 explicit BinaryOp(Instruction *Op)
1396 : Opcode(Op->getOpcode()),
1397 Operands({Op->getOperand(0), Op->getOperand(1)}) {
1398 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Op)) {
1399 IsNSW = OBO->hasNoSignedWrap();
1400 IsNUW = OBO->hasNoUnsignedWrap();
1401 }
1402 }
1403
1404 explicit BinaryOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS,
1405 bool IsNSW = false, bool IsNUW = false)
1406 : Opcode(Opcode), Operands({LHS, RHS}), IsNSW(IsNSW), IsNUW(IsNUW) {}
1407};
1408
1409} // end anonymous namespace
1410
1411static std::optional<BinaryOp> matchBinaryOp(Instruction *Op) {
1412 switch (Op->getOpcode()) {
1413 case Instruction::Add:
1414 case Instruction::Sub:
1415 case Instruction::Mul:
1416 return BinaryOp(Op);
1417 case Instruction::Or: {
1418 // Convert or disjoint into add nuw nsw.
1419 if (cast<PossiblyDisjointInst>(Op)->isDisjoint())
1420 return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1),
1421 /*IsNSW=*/true, /*IsNUW=*/true);
1422 break;
1423 }
1424 case Instruction::Shl: {
1425 if (ConstantInt *SA = dyn_cast<ConstantInt>(Op->getOperand(1))) {
1426 unsigned BitWidth = cast<IntegerType>(SA->getType())->getBitWidth();
1427
1428 // If the shift count is not less than the bitwidth, the result of
1429 // the shift is undefined. Don't try to analyze it, because the
1430 // resolution chosen here may differ from the resolution chosen in
1431 // other parts of the compiler.
1432 if (SA->getValue().ult(BitWidth)) {
1433 // We can safely preserve the nuw flag in all cases. It's also safe to
1434 // turn a nuw nsw shl into a nuw nsw mul. However, nsw in isolation
1435 // requires special handling. It can be preserved as long as we're not
1436 // left shifting by bitwidth - 1.
1437 bool IsNUW = Op->hasNoUnsignedWrap();
1438 bool IsNSW = Op->hasNoSignedWrap() &&
1439 (IsNUW || SA->getValue().ult(BitWidth - 1));
1440
1441 ConstantInt *X =
1442 ConstantInt::get(Op->getContext(),
1443 APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
1444 return BinaryOp(Instruction::Mul, Op->getOperand(0), X, IsNSW, IsNUW);
1445 }
1446 }
1447
1448 break;
1449 }
1450 }
1451
1452 return std::nullopt;
1453}
1454
1455/// No-wrap operations can transfer sign extension of their result to their
1456/// operands. Generate the SCEV value for the widened operation without
1457/// actually modifying the IR yet. If the expression after extending the
1458/// operands is an AddRec for this loop, return the AddRec and the kind of
1459/// extension used.
1460WidenIV::WidenedRecTy
1461WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) {
1462 auto Op = matchBinaryOp(DU.NarrowUse);
1463 if (!Op)
1464 return {nullptr, ExtendKind::Unknown};
1465
1466 assert((Op->Opcode == Instruction::Add || Op->Opcode == Instruction::Sub ||
1467 Op->Opcode == Instruction::Mul) &&
1468 "Unexpected opcode");
1469
1470 // One operand (NarrowDef) has already been extended to WideDef. Now determine
1471 // if extending the other will lead to a recurrence.
1472 const unsigned ExtendOperIdx = Op->Operands[0] == DU.NarrowDef ? 1 : 0;
1473 assert(Op->Operands[1 - ExtendOperIdx] == DU.NarrowDef && "bad DU");
1474
1475 ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1476 if (!(ExtKind == ExtendKind::Sign && Op->IsNSW) &&
1477 !(ExtKind == ExtendKind::Zero && Op->IsNUW)) {
1478 ExtKind = ExtendKind::Unknown;
1479
1480 // For a non-negative NarrowDef, we can choose either type of
1481 // extension. We want to use the current extend kind if legal
1482 // (see above), and we only hit this code if we need to check
1483 // the opposite case.
1484 if (DU.NeverNegative) {
1485 if (Op->IsNSW) {
1486 ExtKind = ExtendKind::Sign;
1487 } else if (Op->IsNUW) {
1488 ExtKind = ExtendKind::Zero;
1489 }
1490 }
1491 }
1492
1493 const SCEV *ExtendOperExpr = SE->getSCEV(Op->Operands[ExtendOperIdx]);
1494 if (ExtKind == ExtendKind::Sign)
1495 ExtendOperExpr = SE->getSignExtendExpr(ExtendOperExpr, WideType);
1496 else if (ExtKind == ExtendKind::Zero)
1497 ExtendOperExpr = SE->getZeroExtendExpr(ExtendOperExpr, WideType);
1498 else
1499 return {nullptr, ExtendKind::Unknown};
1500
1501 // When creating this SCEV expr, don't apply the current operations NSW or NUW
1502 // flags. This instruction may be guarded by control flow that the no-wrap
1503 // behavior depends on. Non-control-equivalent instructions can be mapped to
1504 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1505 // semantics to those operations.
1506 const SCEV *lhs = SE->getSCEV(DU.WideDef);
1507 const SCEV *rhs = ExtendOperExpr;
1508
1509 // Let's swap operands to the initial order for the case of non-commutative
1510 // operations, like SUB. See PR21014.
1511 if (ExtendOperIdx == 0)
1512 std::swap(lhs, rhs);
1513 const SCEVAddRecExpr *AddRec =
1514 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, Op->Opcode));
1515
1516 if (!AddRec || AddRec->getLoop() != L)
1517 return {nullptr, ExtendKind::Unknown};
1518
1519 return {AddRec, ExtKind};
1520}
1521
1522/// Is this instruction potentially interesting for further simplification after
1523/// widening it's type? In other words, can the extend be safely hoisted out of
1524/// the loop with SCEV reducing the value to a recurrence on the same loop. If
1525/// so, return the extended recurrence and the kind of extension used. Otherwise
1526/// return {nullptr, ExtendKind::Unknown}.
1527WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) {
1528 if (!DU.NarrowUse->getType()->isIntegerTy())
1529 return {nullptr, ExtendKind::Unknown};
1530
1531 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1532 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1533 SE->getTypeSizeInBits(WideType)) {
1534 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1535 // index. So don't follow this use.
1536 return {nullptr, ExtendKind::Unknown};
1537 }
1538
1539 const SCEV *WideExpr;
1540 ExtendKind ExtKind;
1541 if (DU.NeverNegative) {
1542 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1543 if (isa<SCEVAddRecExpr>(WideExpr))
1544 ExtKind = ExtendKind::Sign;
1545 else {
1546 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1547 ExtKind = ExtendKind::Zero;
1548 }
1549 } else if (getExtendKind(DU.NarrowDef) == ExtendKind::Sign) {
1550 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1551 ExtKind = ExtendKind::Sign;
1552 } else {
1553 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1554 ExtKind = ExtendKind::Zero;
1555 }
1556 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1557 if (!AddRec || AddRec->getLoop() != L)
1558 return {nullptr, ExtendKind::Unknown};
1559 return {AddRec, ExtKind};
1560}
1561
1562/// This IV user cannot be widened. Replace this use of the original narrow IV
1563/// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1564static void truncateIVUse(WidenIV::NarrowIVDefUse DU, DominatorTree *DT,
1565 LoopInfo *LI) {
1566 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1567 if (!InsertPt)
1568 return;
1569 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1570 << *DU.NarrowUse << "\n");
1571 IRBuilder<> Builder(InsertPt);
1572 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1573 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1574}
1575
1576/// If the narrow use is a compare instruction, then widen the compare
1577// (and possibly the other operand). The extend operation is hoisted into the
1578// loop preheader as far as possible.
1579bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) {
1580 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1581 if (!Cmp)
1582 return false;
1583
1584 // We can legally widen the comparison in the following two cases:
1585 //
1586 // - The signedness of the IV extension and comparison match
1587 //
1588 // - The narrow IV is always positive (and thus its sign extension is equal
1589 // to its zero extension). For instance, let's say we're zero extending
1590 // %narrow for the following use
1591 //
1592 // icmp slt i32 %narrow, %val ... (A)
1593 //
1594 // and %narrow is always positive. Then
1595 //
1596 // (A) == icmp slt i32 sext(%narrow), sext(%val)
1597 // == icmp slt i32 zext(%narrow), sext(%val)
1598 bool IsSigned = getExtendKind(DU.NarrowDef) == ExtendKind::Sign;
1599 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1600 return false;
1601
1602 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1603 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1604 unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1605 assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1606
1607 // Widen the compare instruction.
1608 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1609
1610 // Widen the other operand of the compare, if necessary.
1611 if (CastWidth < IVWidth) {
1612 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1613 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1614 }
1615 return true;
1616}
1617
1618// The widenIVUse avoids generating trunc by evaluating the use as AddRec, this
1619// will not work when:
1620// 1) SCEV traces back to an instruction inside the loop that SCEV can not
1621// expand, eg. add %indvar, (load %addr)
1622// 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant
1623// While SCEV fails to avoid trunc, we can still try to use instruction
1624// combining approach to prove trunc is not required. This can be further
1625// extended with other instruction combining checks, but for now we handle the
1626// following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext")
1627//
1628// Src:
1629// %c = sub nsw %b, %indvar
1630// %d = sext %c to i64
1631// Dst:
1632// %indvar.ext1 = sext %indvar to i64
1633// %m = sext %b to i64
1634// %d = sub nsw i64 %m, %indvar.ext1
1635// Therefore, as long as the result of add/sub/mul is extended to wide type, no
1636// trunc is required regardless of how %b is generated. This pattern is common
1637// when calculating address in 64 bit architecture
1638bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
1639 Instruction *NarrowUse = DU.NarrowUse;
1640 Instruction *NarrowDef = DU.NarrowDef;
1641 Instruction *WideDef = DU.WideDef;
1642
1643 // Handle the common case of add<nsw/nuw>
1644 const unsigned OpCode = NarrowUse->getOpcode();
1645 // Only Add/Sub/Mul instructions are supported.
1646 if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1647 OpCode != Instruction::Mul)
1648 return false;
1649
1650 // The operand that is not defined by NarrowDef of DU. Let's call it the
1651 // other operand.
1652 assert((NarrowUse->getOperand(0) == NarrowDef ||
1653 NarrowUse->getOperand(1) == NarrowDef) &&
1654 "bad DU");
1655
1656 const OverflowingBinaryOperator *OBO =
1657 cast<OverflowingBinaryOperator>(NarrowUse);
1658 ExtendKind ExtKind = getExtendKind(NarrowDef);
1659 bool CanSignExtend = ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap();
1660 bool CanZeroExtend = ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap();
1661 auto AnotherOpExtKind = ExtKind;
1662
1663 // Check that all uses are either:
1664 // - narrow def (in case of we are widening the IV increment);
1665 // - single-input LCSSA Phis;
1666 // - comparison of the chosen type;
1667 // - extend of the chosen type (raison d'etre).
1669 SmallVector<PHINode *, 4> LCSSAPhiUsers;
1671 for (Use &U : NarrowUse->uses()) {
1672 Instruction *User = cast<Instruction>(U.getUser());
1673 if (User == NarrowDef)
1674 continue;
1675 if (!L->contains(User)) {
1676 auto *LCSSAPhi = cast<PHINode>(User);
1677 // Make sure there is only 1 input, so that we don't have to split
1678 // critical edges.
1679 if (LCSSAPhi->getNumOperands() != 1)
1680 return false;
1681 LCSSAPhiUsers.push_back(LCSSAPhi);
1682 continue;
1683 }
1684 if (auto *ICmp = dyn_cast<ICmpInst>(User)) {
1685 auto Pred = ICmp->getPredicate();
1686 // We have 3 types of predicates: signed, unsigned and equality
1687 // predicates. For equality, it's legal to widen icmp for either sign and
1688 // zero extend. For sign extend, we can also do so for signed predicates,
1689 // likeweise for zero extend we can widen icmp for unsigned predicates.
1690 if (ExtKind == ExtendKind::Zero && ICmpInst::isSigned(Pred))
1691 return false;
1692 if (ExtKind == ExtendKind::Sign && ICmpInst::isUnsigned(Pred))
1693 return false;
1694 ICmpUsers.push_back(ICmp);
1695 continue;
1696 }
1697 if (ExtKind == ExtendKind::Sign)
1698 User = dyn_cast<SExtInst>(User);
1699 else
1700 User = dyn_cast<ZExtInst>(User);
1701 if (!User || User->getType() != WideType)
1702 return false;
1703 ExtUsers.push_back(User);
1704 }
1705 if (ExtUsers.empty()) {
1706 DeadInsts.emplace_back(NarrowUse);
1707 return true;
1708 }
1709
1710 // We'll prove some facts that should be true in the context of ext users. If
1711 // there is no users, we are done now. If there are some, pick their common
1712 // dominator as context.
1713 const Instruction *CtxI = findCommonDominator(ExtUsers, *DT);
1714
1715 if (!CanSignExtend && !CanZeroExtend) {
1716 // Because InstCombine turns 'sub nuw' to 'add' losing the no-wrap flag, we
1717 // will most likely not see it. Let's try to prove it.
1718 if (OpCode != Instruction::Add)
1719 return false;
1720 if (ExtKind != ExtendKind::Zero)
1721 return false;
1722 const SCEV *LHS = SE->getSCEV(OBO->getOperand(0));
1723 const SCEV *RHS = SE->getSCEV(OBO->getOperand(1));
1724 // TODO: Support case for NarrowDef = NarrowUse->getOperand(1).
1725 if (NarrowUse->getOperand(0) != NarrowDef)
1726 return false;
1727 if (!SE->isKnownNegative(RHS))
1728 return false;
1729 bool ProvedSubNUW = SE->isKnownPredicateAt(ICmpInst::ICMP_UGE, LHS,
1730 SE->getNegativeSCEV(RHS), CtxI);
1731 if (!ProvedSubNUW)
1732 return false;
1733 // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as
1734 // neg(zext(neg(op))), which is basically sext(op).
1735 AnotherOpExtKind = ExtendKind::Sign;
1736 }
1737
1738 // Verifying that Defining operand is an AddRec
1739 const SCEV *Op1 = SE->getSCEV(WideDef);
1740 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1741 if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1742 return false;
1743
1744 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1745
1746 // Generating a widening use instruction.
1747 Value *LHS =
1748 (NarrowUse->getOperand(0) == NarrowDef)
1749 ? WideDef
1750 : createExtendInst(NarrowUse->getOperand(0), WideType,
1751 AnotherOpExtKind == ExtendKind::Sign, NarrowUse);
1752 Value *RHS =
1753 (NarrowUse->getOperand(1) == NarrowDef)
1754 ? WideDef
1755 : createExtendInst(NarrowUse->getOperand(1), WideType,
1756 AnotherOpExtKind == ExtendKind::Sign, NarrowUse);
1757
1758 auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1759 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1760 NarrowBO->getName());
1761 IRBuilder<> Builder(NarrowUse);
1762 Builder.Insert(WideBO);
1763 WideBO->copyIRFlags(NarrowBO);
1764 ExtendKindMap[NarrowUse] = ExtKind;
1765
1766 for (Instruction *User : ExtUsers) {
1767 assert(User->getType() == WideType && "Checked before!");
1768 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1769 << *WideBO << "\n");
1770 ++NumElimExt;
1771 User->replaceAllUsesWith(WideBO);
1772 DeadInsts.emplace_back(User);
1773 }
1774
1775 for (PHINode *User : LCSSAPhiUsers) {
1776 assert(User->getNumOperands() == 1 && "Checked before!");
1777 Builder.SetInsertPoint(User);
1778 auto *WidePN =
1779 Builder.CreatePHI(WideBO->getType(), 1, User->getName() + ".wide");
1780 BasicBlock *LoopExitingBlock = User->getParent()->getSinglePredecessor();
1781 assert(LoopExitingBlock && L->contains(LoopExitingBlock) &&
1782 "Not a LCSSA Phi?");
1783 WidePN->addIncoming(WideBO, LoopExitingBlock);
1784 Builder.SetInsertPoint(User->getParent(),
1785 User->getParent()->getFirstInsertionPt());
1786 auto *TruncPN = Builder.CreateTrunc(WidePN, User->getType());
1787 User->replaceAllUsesWith(TruncPN);
1788 DeadInsts.emplace_back(User);
1789 }
1790
1791 for (ICmpInst *User : ICmpUsers) {
1792 Builder.SetInsertPoint(User);
1793 auto ExtendedOp = [&](Value * V)->Value * {
1794 if (V == NarrowUse)
1795 return WideBO;
1796 if (ExtKind == ExtendKind::Zero)
1797 return Builder.CreateZExt(V, WideBO->getType());
1798 else
1799 return Builder.CreateSExt(V, WideBO->getType());
1800 };
1801 auto Pred = User->getPredicate();
1802 auto *LHS = ExtendedOp(User->getOperand(0));
1803 auto *RHS = ExtendedOp(User->getOperand(1));
1804 auto *WideCmp =
1805 Builder.CreateICmp(Pred, LHS, RHS, User->getName() + ".wide");
1806 User->replaceAllUsesWith(WideCmp);
1807 DeadInsts.emplace_back(User);
1808 }
1809
1810 return true;
1811}
1812
1813/// Determine whether an individual user of the narrow IV can be widened. If so,
1814/// return the wide clone of the user.
1815Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU,
1816 SCEVExpander &Rewriter, PHINode *OrigPhi,
1817 PHINode *WidePhi) {
1818 assert(ExtendKindMap.count(DU.NarrowDef) &&
1819 "Should already know the kind of extension used to widen NarrowDef");
1820
1821 // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1822 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1823 if (LI->getLoopFor(UsePhi->getParent()) != L) {
1824 // For LCSSA phis, sink the truncate outside the loop.
1825 // After SimplifyCFG most loop exit targets have a single predecessor.
1826 // Otherwise fall back to a truncate within the loop.
1827 if (UsePhi->getNumOperands() != 1)
1828 truncateIVUse(DU, DT, LI);
1829 else {
1830 // Widening the PHI requires us to insert a trunc. The logical place
1831 // for this trunc is in the same BB as the PHI. This is not possible if
1832 // the BB is terminated by a catchswitch.
1833 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1834 return nullptr;
1835
1836 PHINode *WidePhi =
1837 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1838 UsePhi->getIterator());
1839 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1840 BasicBlock *WidePhiBB = WidePhi->getParent();
1841 IRBuilder<> Builder(WidePhiBB, WidePhiBB->getFirstInsertionPt());
1842 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1843 UsePhi->replaceAllUsesWith(Trunc);
1844 DeadInsts.emplace_back(UsePhi);
1845 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1846 << *WidePhi << "\n");
1847 }
1848 return nullptr;
1849 }
1850 }
1851
1852 // This narrow use can be widened by a sext if it's non-negative or its narrow
1853 // def was widened by a sext. Same for zext.
1854 auto canWidenBySExt = [&]() {
1855 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Sign;
1856 };
1857 auto canWidenByZExt = [&]() {
1858 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Zero;
1859 };
1860
1861 // Our raison d'etre! Eliminate sign and zero extension.
1862 if ((match(DU.NarrowUse, m_SExtLike(m_Value())) && canWidenBySExt()) ||
1863 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1864 Value *NewDef = DU.WideDef;
1865 if (DU.NarrowUse->getType() != WideType) {
1866 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1867 unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1868 if (CastWidth < IVWidth) {
1869 // The cast isn't as wide as the IV, so insert a Trunc.
1870 IRBuilder<> Builder(DU.NarrowUse);
1871 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1872 }
1873 else {
1874 // A wider extend was hidden behind a narrower one. This may induce
1875 // another round of IV widening in which the intermediate IV becomes
1876 // dead. It should be very rare.
1877 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1878 << " not wide enough to subsume " << *DU.NarrowUse
1879 << "\n");
1880 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1881 NewDef = DU.NarrowUse;
1882 }
1883 }
1884 if (NewDef != DU.NarrowUse) {
1885 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1886 << " replaced by " << *DU.WideDef << "\n");
1887 ++NumElimExt;
1888 DU.NarrowUse->replaceAllUsesWith(NewDef);
1889 DeadInsts.emplace_back(DU.NarrowUse);
1890 }
1891 // Now that the extend is gone, we want to expose it's uses for potential
1892 // further simplification. We don't need to directly inform SimplifyIVUsers
1893 // of the new users, because their parent IV will be processed later as a
1894 // new loop phi. If we preserved IVUsers analysis, we would also want to
1895 // push the uses of WideDef here.
1896
1897 // No further widening is needed. The deceased [sz]ext had done it for us.
1898 return nullptr;
1899 }
1900
1901 auto tryAddRecExpansion = [&]() -> Instruction* {
1902 // Does this user itself evaluate to a recurrence after widening?
1903 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1904 if (!WideAddRec.first)
1905 WideAddRec = getWideRecurrence(DU);
1906 assert((WideAddRec.first == nullptr) ==
1907 (WideAddRec.second == ExtendKind::Unknown));
1908 if (!WideAddRec.first)
1909 return nullptr;
1910
1911 // Reuse the IV increment that SCEVExpander created. Recompute flags, unless
1912 // the flags for both increments agree and it is safe to use the ones from
1913 // the original inc. In that case, the new use of the wide increment won't
1914 // be more poisonous.
1915 bool NeedToRecomputeFlags =
1917 DU.NarrowUse, WideInc) ||
1918 DU.NarrowUse->hasNoUnsignedWrap() != WideInc->hasNoUnsignedWrap() ||
1919 DU.NarrowUse->hasNoSignedWrap() != WideInc->hasNoSignedWrap();
1920 Instruction *WideUse = nullptr;
1921 if (WideAddRec.first == WideIncExpr &&
1922 Rewriter.hoistIVInc(WideInc, DU.NarrowUse, NeedToRecomputeFlags))
1923 WideUse = WideInc;
1924 else {
1925 WideUse = cloneIVUser(DU, WideAddRec.first);
1926 if (!WideUse)
1927 return nullptr;
1928 }
1929 // Evaluation of WideAddRec ensured that the narrow expression could be
1930 // extended outside the loop without overflow. This suggests that the wide use
1931 // evaluates to the same expression as the extended narrow use, but doesn't
1932 // absolutely guarantee it. Hence the following failsafe check. In rare cases
1933 // where it fails, we simply throw away the newly created wide use.
1934 if (WideAddRec.first != SE->getSCEV(WideUse)) {
1935 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1936 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1937 << "\n");
1938 DeadInsts.emplace_back(WideUse);
1939 return nullptr;
1940 };
1941
1942 // if we reached this point then we are going to replace
1943 // DU.NarrowUse with WideUse. Reattach DbgValue then.
1944 replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1945
1946 ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1947 // Returning WideUse pushes it on the worklist.
1948 return WideUse;
1949 };
1950
1951 if (auto *I = tryAddRecExpansion())
1952 return I;
1953
1954 // If use is a loop condition, try to promote the condition instead of
1955 // truncating the IV first.
1956 if (widenLoopCompare(DU))
1957 return nullptr;
1958
1959 // We are here about to generate a truncate instruction that may hurt
1960 // performance because the scalar evolution expression computed earlier
1961 // in WideAddRec.first does not indicate a polynomial induction expression.
1962 // In that case, look at the operands of the use instruction to determine
1963 // if we can still widen the use instead of truncating its operand.
1964 if (widenWithVariantUse(DU))
1965 return nullptr;
1966
1967 // This user does not evaluate to a recurrence after widening, so don't
1968 // follow it. Instead insert a Trunc to kill off the original use,
1969 // eventually isolating the original narrow IV so it can be removed.
1970 truncateIVUse(DU, DT, LI);
1971 return nullptr;
1972}
1973
1974/// Add eligible users of NarrowDef to NarrowIVUsers.
1975void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1976 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1977 bool NonNegativeDef =
1978 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1979 SE->getZero(NarrowSCEV->getType()));
1980 for (User *U : NarrowDef->users()) {
1981 Instruction *NarrowUser = cast<Instruction>(U);
1982
1983 // Handle data flow merges and bizarre phi cycles.
1984 if (!Widened.insert(NarrowUser).second)
1985 continue;
1986
1987 bool NonNegativeUse = false;
1988 if (!NonNegativeDef) {
1989 // We might have a control-dependent range information for this context.
1990 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1991 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1992 }
1993
1994 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1995 NonNegativeDef || NonNegativeUse);
1996 }
1997}
1998
1999/// Process a single induction variable. First use the SCEVExpander to create a
2000/// wide induction variable that evaluates to the same recurrence as the
2001/// original narrow IV. Then use a worklist to forward traverse the narrow IV's
2002/// def-use chain. After widenIVUse has processed all interesting IV users, the
2003/// narrow IV will be isolated for removal by DeleteDeadPHIs.
2004///
2005/// It would be simpler to delete uses as they are processed, but we must avoid
2006/// invalidating SCEV expressions.
2007PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
2008 // Is this phi an induction variable?
2009 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
2010 if (!AddRec)
2011 return nullptr;
2012
2013 // Widen the induction variable expression.
2014 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == ExtendKind::Sign
2015 ? SE->getSignExtendExpr(AddRec, WideType)
2016 : SE->getZeroExtendExpr(AddRec, WideType);
2017
2018 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
2019 "Expect the new IV expression to preserve its type");
2020
2021 // Can the IV be extended outside the loop without overflow?
2022 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
2023 if (!AddRec || AddRec->getLoop() != L)
2024 return nullptr;
2025
2026 // An AddRec must have loop-invariant operands. Since this AddRec is
2027 // materialized by a loop header phi, the expression cannot have any post-loop
2028 // operands, so they must dominate the loop header.
2029 assert(
2030 SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
2031 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
2032 "Loop header phi recurrence inputs do not dominate the loop");
2033
2034 // Iterate over IV uses (including transitive ones) looking for IV increments
2035 // of the form 'add nsw %iv, <const>'. For each increment and each use of
2036 // the increment calculate control-dependent range information basing on
2037 // dominating conditions inside of the loop (e.g. a range check inside of the
2038 // loop). Calculated ranges are stored in PostIncRangeInfos map.
2039 //
2040 // Control-dependent range information is later used to prove that a narrow
2041 // definition is not negative (see pushNarrowIVUsers). It's difficult to do
2042 // this on demand because when pushNarrowIVUsers needs this information some
2043 // of the dominating conditions might be already widened.
2045 calculatePostIncRanges(OrigPhi);
2046
2047 // The rewriter provides a value for the desired IV expression. This may
2048 // either find an existing phi or materialize a new one. Either way, we
2049 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
2050 // of the phi-SCC dominates the loop entry.
2051 Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt();
2052 Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt);
2053 // If the wide phi is not a phi node, for example a cast node, like bitcast,
2054 // inttoptr, ptrtoint, just skip for now.
2055 if (!(WidePhi = dyn_cast<PHINode>(ExpandInst))) {
2056 // if the cast node is an inserted instruction without any user, we should
2057 // remove it to make sure the pass don't touch the function as we can not
2058 // wide the phi.
2059 if (ExpandInst->hasNUses(0) &&
2060 Rewriter.isInsertedInstruction(cast<Instruction>(ExpandInst)))
2061 DeadInsts.emplace_back(ExpandInst);
2062 return nullptr;
2063 }
2064
2065 // Remembering the WideIV increment generated by SCEVExpander allows
2066 // widenIVUse to reuse it when widening the narrow IV's increment. We don't
2067 // employ a general reuse mechanism because the call above is the only call to
2068 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
2069 if (BasicBlock *LatchBlock = L->getLoopLatch()) {
2070 WideInc =
2071 dyn_cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
2072 if (WideInc) {
2073 WideIncExpr = SE->getSCEV(WideInc);
2074 // Propagate the debug location associated with the original loop
2075 // increment to the new (widened) increment.
2076 auto *OrigInc =
2077 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
2078
2079 WideInc->setDebugLoc(OrigInc->getDebugLoc());
2080 // We are replacing a narrow IV increment with a wider IV increment. If
2081 // the original (narrow) increment did not wrap, the wider increment one
2082 // should not wrap either. Set the flags to be the union of both wide
2083 // increment and original increment; this ensures we preserve flags SCEV
2084 // could infer for the wider increment. Limit this only to cases where
2085 // both increments directly increment the corresponding PHI nodes and have
2086 // the same opcode. It is not safe to re-use the flags from the original
2087 // increment, if it is more complex and SCEV expansion may have yielded a
2088 // more simplified wider increment.
2090 OrigInc, WideInc) &&
2091 isa<OverflowingBinaryOperator>(OrigInc) &&
2092 isa<OverflowingBinaryOperator>(WideInc)) {
2093 WideInc->setHasNoUnsignedWrap(WideInc->hasNoUnsignedWrap() ||
2094 OrigInc->hasNoUnsignedWrap());
2095 WideInc->setHasNoSignedWrap(WideInc->hasNoSignedWrap() ||
2096 OrigInc->hasNoSignedWrap());
2097 }
2098 }
2099 }
2100
2101 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
2102 ++NumWidened;
2103
2104 // Traverse the def-use chain using a worklist starting at the original IV.
2105 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
2106
2107 Widened.insert(OrigPhi);
2108 pushNarrowIVUsers(OrigPhi, WidePhi);
2109
2110 while (!NarrowIVUsers.empty()) {
2111 WidenIV::NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
2112
2113 // Process a def-use edge. This may replace the use, so don't hold a
2114 // use_iterator across it.
2115 Instruction *WideUse = widenIVUse(DU, Rewriter, OrigPhi, WidePhi);
2116
2117 // Follow all def-use edges from the previous narrow use.
2118 if (WideUse)
2119 pushNarrowIVUsers(DU.NarrowUse, WideUse);
2120
2121 // widenIVUse may have removed the def-use edge.
2122 if (DU.NarrowDef->use_empty())
2123 DeadInsts.emplace_back(DU.NarrowDef);
2124 }
2125
2126 // Attach any debug information to the new PHI.
2127 replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
2128
2129 return WidePhi;
2130}
2131
2132/// Calculates control-dependent range for the given def at the given context
2133/// by looking at dominating conditions inside of the loop
2134void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
2135 Instruction *NarrowUser) {
2136 Value *NarrowDefLHS;
2137 const APInt *NarrowDefRHS;
2138 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
2139 m_APInt(NarrowDefRHS))) ||
2140 !NarrowDefRHS->isNonNegative())
2141 return;
2142
2143 auto UpdateRangeFromCondition = [&] (Value *Condition,
2144 bool TrueDest) {
2145 CmpInst::Predicate Pred;
2146 Value *CmpRHS;
2147 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
2148 m_Value(CmpRHS))))
2149 return;
2150
2152 TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
2153
2154 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
2155 auto CmpConstrainedLHSRange =
2157 auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
2159
2160 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
2161 };
2162
2163 auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
2164 if (!HasGuards)
2165 return;
2166
2167 for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
2168 Ctx->getParent()->rend())) {
2169 Value *C = nullptr;
2170 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
2171 UpdateRangeFromCondition(C, /*TrueDest=*/true);
2172 }
2173 };
2174
2175 UpdateRangeFromGuards(NarrowUser);
2176
2177 BasicBlock *NarrowUserBB = NarrowUser->getParent();
2178 // If NarrowUserBB is statically unreachable asking dominator queries may
2179 // yield surprising results. (e.g. the block may not have a dom tree node)
2180 if (!DT->isReachableFromEntry(NarrowUserBB))
2181 return;
2182
2183 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
2184 L->contains(DTB->getBlock());
2185 DTB = DTB->getIDom()) {
2186 auto *BB = DTB->getBlock();
2187 auto *TI = BB->getTerminator();
2188 UpdateRangeFromGuards(TI);
2189
2190 auto *BI = dyn_cast<BranchInst>(TI);
2191 if (!BI || !BI->isConditional())
2192 continue;
2193
2194 auto *TrueSuccessor = BI->getSuccessor(0);
2195 auto *FalseSuccessor = BI->getSuccessor(1);
2196
2197 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
2198 return BBE.isSingleEdge() &&
2199 DT->dominates(BBE, NarrowUser->getParent());
2200 };
2201
2202 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
2203 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
2204
2205 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
2206 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
2207 }
2208}
2209
2210/// Calculates PostIncRangeInfos map for the given IV
2211void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
2214 Worklist.push_back(OrigPhi);
2215 Visited.insert(OrigPhi);
2216
2217 while (!Worklist.empty()) {
2218 Instruction *NarrowDef = Worklist.pop_back_val();
2219
2220 for (Use &U : NarrowDef->uses()) {
2221 auto *NarrowUser = cast<Instruction>(U.getUser());
2222
2223 // Don't go looking outside the current loop.
2224 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
2225 if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
2226 continue;
2227
2228 if (!Visited.insert(NarrowUser).second)
2229 continue;
2230
2231 Worklist.push_back(NarrowUser);
2232
2233 calculatePostIncRange(NarrowDef, NarrowUser);
2234 }
2235 }
2236}
2237
2239 LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter,
2241 unsigned &NumElimExt, unsigned &NumWidened,
2242 bool HasGuards, bool UsePostIncrementRanges) {
2243 WidenIV Widener(WI, LI, SE, DT, DeadInsts, HasGuards, UsePostIncrementRanges);
2244 PHINode *WidePHI = Widener.createWideIV(Rewriter);
2245 NumElimExt = Widener.getNumElimExt();
2246 NumWidened = Widener.getNumWidened();
2247 return WidePHI;
2248}
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
Rewrite undef for PHI
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define DEBUG_TYPE
iv Induction Variable Users
Definition: IVUsers.cpp:48
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
static cl::opt< bool > WidenIV("loop-flatten-widen-iv", cl::Hidden, cl::init(true), cl::desc("Widen the loop induction variables, if possible, so " "overflow checks won't reject flattening"))
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void truncateIVUse(WidenIV::NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI)
This IV user cannot be widened.
static Instruction * GetLoopInvariantInsertPosition(Loop *L, Instruction *Hint)
static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE)
Return true if this instruction generates a simple SCEV expression in terms of that IV.
static Instruction * findCommonDominator(ArrayRef< Instruction * > Instructions, DominatorTree &DT)
Find a point in code which dominates all given instructions.
static void pushIVUsers(Instruction *Def, Loop *L, SmallPtrSet< Instruction *, 16 > &Simplified, SmallVectorImpl< std::pair< Instruction *, Instruction * > > &SimpleIVUsers)
Add all uses of Def to the current IV's worklist.
static Instruction * getInsertPointForUses(Instruction *User, Value *Def, DominatorTree *DT, LoopInfo *LI)
Determine the insertion point for this user.
static std::optional< BinaryOp > matchBinaryOp(Instruction *Op)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
Class for arbitrary precision integers.
Definition: APInt.h:76
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:312
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Value handle that asserts if the Value is deleted.
Definition: ValueHandle.h:264
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:396
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:164
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:220
Value * getRHS() const
bool isSigned() const
Whether the intrinsic is signed or unsigned.
Instruction::BinaryOps getBinaryOp() const
Returns the binary operation underlying the intrinsic.
Value * getLHS() const
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a binary instruction, given the opcode and the two operands.
BinaryOps getOpcode() const
Definition: InstrTypes.h:491
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:579
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Definition: InstrTypes.h:1069
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:965
bool isSigned() const
Definition: InstrTypes.h:1226
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:1090
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:1066
bool isUnsigned() const
Definition: InstrTypes.h:1232
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:856
static ConstantInt * getBool(LLVMContext &Context, bool V)
Definition: Constants.cpp:863
This class represents a range of values.
Definition: ConstantRange.h:47
APInt getUnsignedMin() const
Return the smallest unsigned value contained in the ConstantRange.
static ConstantRange makeAllowedICmpRegion(CmpInst::Predicate Pred, const ConstantRange &Other)
Produce the smallest range such that all values that may satisfy the given predicate with any value c...
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
Definition: Dominators.cpp:344
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2006
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2649
Interface for visiting interesting IV users that are recognized but not simplified by this utility.
virtual void anchor()
void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const BasicBlock * getParent() const
Definition: Instruction.h:151
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:251
void setIsExact(bool b=true)
Set or clear the exact flag on this instruction, which must be an operator which supports this flag.
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:450
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition: Operator.h:75
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition: Operator.h:108
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition: Operator.h:102
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class uses information about analyze scalars to rewrite expressions in canonical form.
static bool canReuseFlagsFromOriginalIVInc(PHINode *OrigPhi, PHINode *WidePhi, Instruction *OrigInc, Instruction *WideInc)
Return true if both increments directly increment the corresponding IV PHI nodes and have the same op...
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
Represents a saturating add/sub intrinsic.
The main scalar evolution driver.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
static SCEV::NoWrapFlags maskFlags(SCEV::NoWrapFlags Flags, int Mask)
Convenient NoWrapFlags manipulation that hides enum casts and is visible in the ScalarEvolution name ...
bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr, BasicBlock::iterator InsertBefore, Instruction *MDFrom=nullptr)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:149
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
Represents an op.with.overflow intrinsic.
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Key
PAL metadata keys.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:821
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:294
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
PHINode * createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges)
Widen Induction Variables - Extend the width of an IV to cover its widest uses.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:399
bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
cl::opt< unsigned > SCEVCheapExpansionBudget
bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl< WeakTrackingVH > &Dead)
SimplifyLoopIVs - Simplify users of induction variables within this loop.
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:2693
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
Definition: LCSSA.cpp:77
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Collect information about induction variables that are used by sign/zero extend operations.