LLVM 19.0.0git
SimplifyIndVar.cpp
Go to the documentation of this file.
1//===-- SimplifyIndVar.cpp - Induction variable simplification ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements induction variable simplification. It does
10// not define any actual pass or policy, but provides a single function to
11// simplify a loop's induction variables based on ScalarEvolution.
12//
13//===----------------------------------------------------------------------===//
14
17#include "llvm/ADT/Statistic.h"
20#include "llvm/IR/Dominators.h"
21#include "llvm/IR/IRBuilder.h"
25#include "llvm/Support/Debug.h"
30
31using namespace llvm;
32using namespace llvm::PatternMatch;
33
34#define DEBUG_TYPE "indvars"
35
36STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
37STATISTIC(NumElimOperand, "Number of IV operands folded into a use");
38STATISTIC(NumFoldedUser, "Number of IV users folded into a constant");
39STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
41 NumSimplifiedSDiv,
42 "Number of IV signed division operations converted to unsigned division");
44 NumSimplifiedSRem,
45 "Number of IV signed remainder operations converted to unsigned remainder");
46STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
47
48namespace {
49 /// This is a utility for simplifying induction variables
50 /// based on ScalarEvolution. It is the primary instrument of the
51 /// IndvarSimplify pass, but it may also be directly invoked to cleanup after
52 /// other loop passes that preserve SCEV.
53 class SimplifyIndvar {
54 Loop *L;
55 LoopInfo *LI;
57 DominatorTree *DT;
61
62 bool Changed = false;
63 bool RunUnswitching = false;
64
65 public:
66 SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT,
68 SCEVExpander &Rewriter,
70 : L(Loop), LI(LI), SE(SE), DT(DT), TTI(TTI), Rewriter(Rewriter),
71 DeadInsts(Dead) {
72 assert(LI && "IV simplification requires LoopInfo");
73 }
74
75 bool hasChanged() const { return Changed; }
76 bool runUnswitching() const { return RunUnswitching; }
77
78 /// Iteratively perform simplification on a worklist of users of the
79 /// specified induction variable. This is the top-level driver that applies
80 /// all simplifications to users of an IV.
81 void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr);
82
83 Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand);
84
85 bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);
86 bool replaceIVUserWithLoopInvariant(Instruction *UseInst);
87 bool replaceFloatIVWithIntegerIV(Instruction *UseInst);
88
89 bool eliminateOverflowIntrinsic(WithOverflowInst *WO);
90 bool eliminateSaturatingIntrinsic(SaturatingInst *SI);
91 bool eliminateTrunc(TruncInst *TI);
92 bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
93 bool makeIVComparisonInvariant(ICmpInst *ICmp, Instruction *IVOperand);
94 void eliminateIVComparison(ICmpInst *ICmp, Instruction *IVOperand);
95 void simplifyIVRemainder(BinaryOperator *Rem, Instruction *IVOperand,
96 bool IsSigned);
97 void replaceRemWithNumerator(BinaryOperator *Rem);
98 void replaceRemWithNumeratorOrZero(BinaryOperator *Rem);
99 void replaceSRemWithURem(BinaryOperator *Rem);
100 bool eliminateSDiv(BinaryOperator *SDiv);
101 bool strengthenBinaryOp(BinaryOperator *BO, Instruction *IVOperand);
102 bool strengthenOverflowingOperation(BinaryOperator *OBO,
103 Instruction *IVOperand);
104 bool strengthenRightShift(BinaryOperator *BO, Instruction *IVOperand);
105 };
106}
107
108/// Find a point in code which dominates all given instructions. We can safely
109/// assume that, whatever fact we can prove at the found point, this fact is
110/// also true for each of the given instructions.
112 DominatorTree &DT) {
113 Instruction *CommonDom = nullptr;
114 for (auto *Insn : Instructions)
115 CommonDom =
116 CommonDom ? DT.findNearestCommonDominator(CommonDom, Insn) : Insn;
117 assert(CommonDom && "Common dominator not found?");
118 return CommonDom;
119}
120
121/// Fold an IV operand into its use. This removes increments of an
122/// aligned IV when used by a instruction that ignores the low bits.
123///
124/// IVOperand is guaranteed SCEVable, but UseInst may not be.
125///
126/// Return the operand of IVOperand for this induction variable if IVOperand can
127/// be folded (in case more folding opportunities have been exposed).
128/// Otherwise return null.
129Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) {
130 Value *IVSrc = nullptr;
131 const unsigned OperIdx = 0;
132 const SCEV *FoldedExpr = nullptr;
133 bool MustDropExactFlag = false;
134 switch (UseInst->getOpcode()) {
135 default:
136 return nullptr;
137 case Instruction::UDiv:
138 case Instruction::LShr:
139 // We're only interested in the case where we know something about
140 // the numerator and have a constant denominator.
141 if (IVOperand != UseInst->getOperand(OperIdx) ||
142 !isa<ConstantInt>(UseInst->getOperand(1)))
143 return nullptr;
144
145 // Attempt to fold a binary operator with constant operand.
146 // e.g. ((I + 1) >> 2) => I >> 2
147 if (!isa<BinaryOperator>(IVOperand)
148 || !isa<ConstantInt>(IVOperand->getOperand(1)))
149 return nullptr;
150
151 IVSrc = IVOperand->getOperand(0);
152 // IVSrc must be the (SCEVable) IV, since the other operand is const.
153 assert(SE->isSCEVable(IVSrc->getType()) && "Expect SCEVable IV operand");
154
155 ConstantInt *D = cast<ConstantInt>(UseInst->getOperand(1));
156 if (UseInst->getOpcode() == Instruction::LShr) {
157 // Get a constant for the divisor. See createSCEV.
158 uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth();
159 if (D->getValue().uge(BitWidth))
160 return nullptr;
161
162 D = ConstantInt::get(UseInst->getContext(),
163 APInt::getOneBitSet(BitWidth, D->getZExtValue()));
164 }
165 const auto *LHS = SE->getSCEV(IVSrc);
166 const auto *RHS = SE->getSCEV(D);
167 FoldedExpr = SE->getUDivExpr(LHS, RHS);
168 // We might have 'exact' flag set at this point which will no longer be
169 // correct after we make the replacement.
170 if (UseInst->isExact() && LHS != SE->getMulExpr(FoldedExpr, RHS))
171 MustDropExactFlag = true;
172 }
173 // We have something that might fold it's operand. Compare SCEVs.
174 if (!SE->isSCEVable(UseInst->getType()))
175 return nullptr;
176
177 // Bypass the operand if SCEV can prove it has no effect.
178 if (SE->getSCEV(UseInst) != FoldedExpr)
179 return nullptr;
180
181 LLVM_DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand
182 << " -> " << *UseInst << '\n');
183
184 UseInst->setOperand(OperIdx, IVSrc);
185 assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper");
186
187 if (MustDropExactFlag)
188 UseInst->dropPoisonGeneratingFlags();
189
190 ++NumElimOperand;
191 Changed = true;
192 if (IVOperand->use_empty())
193 DeadInsts.emplace_back(IVOperand);
194 return IVSrc;
195}
196
197bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
198 Instruction *IVOperand) {
199 auto *Preheader = L->getLoopPreheader();
200 if (!Preheader)
201 return false;
202 unsigned IVOperIdx = 0;
203 ICmpInst::Predicate Pred = ICmp->getPredicate();
204 if (IVOperand != ICmp->getOperand(0)) {
205 // Swapped
206 assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
207 IVOperIdx = 1;
208 Pred = ICmpInst::getSwappedPredicate(Pred);
209 }
210
211 // Get the SCEVs for the ICmp operands (in the specific context of the
212 // current loop)
213 const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
214 const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop);
215 const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop);
216 auto LIP = SE->getLoopInvariantPredicate(Pred, S, X, L, ICmp);
217 if (!LIP)
218 return false;
219 ICmpInst::Predicate InvariantPredicate = LIP->Pred;
220 const SCEV *InvariantLHS = LIP->LHS;
221 const SCEV *InvariantRHS = LIP->RHS;
222
223 // Do not generate something ridiculous.
224 auto *PHTerm = Preheader->getTerminator();
225 if (Rewriter.isHighCostExpansion({InvariantLHS, InvariantRHS}, L,
226 2 * SCEVCheapExpansionBudget, TTI, PHTerm) ||
227 !Rewriter.isSafeToExpandAt(InvariantLHS, PHTerm) ||
228 !Rewriter.isSafeToExpandAt(InvariantRHS, PHTerm))
229 return false;
230 auto *NewLHS =
231 Rewriter.expandCodeFor(InvariantLHS, IVOperand->getType(), PHTerm);
232 auto *NewRHS =
233 Rewriter.expandCodeFor(InvariantRHS, IVOperand->getType(), PHTerm);
234 LLVM_DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n');
235 ICmp->setPredicate(InvariantPredicate);
236 ICmp->setOperand(0, NewLHS);
237 ICmp->setOperand(1, NewRHS);
238 RunUnswitching = true;
239 return true;
240}
241
242/// SimplifyIVUsers helper for eliminating useless
243/// comparisons against an induction variable.
244void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp,
245 Instruction *IVOperand) {
246 unsigned IVOperIdx = 0;
247 ICmpInst::Predicate Pred = ICmp->getPredicate();
248 ICmpInst::Predicate OriginalPred = Pred;
249 if (IVOperand != ICmp->getOperand(0)) {
250 // Swapped
251 assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
252 IVOperIdx = 1;
253 Pred = ICmpInst::getSwappedPredicate(Pred);
254 }
255
256 // Get the SCEVs for the ICmp operands (in the specific context of the
257 // current loop)
258 const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
259 const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop);
260 const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop);
261
262 // If the condition is always true or always false in the given context,
263 // replace it with a constant value.
265 for (auto *U : ICmp->users())
266 Users.push_back(cast<Instruction>(U));
267 const Instruction *CtxI = findCommonDominator(Users, *DT);
268 if (auto Ev = SE->evaluatePredicateAt(Pred, S, X, CtxI)) {
269 SE->forgetValue(ICmp);
271 DeadInsts.emplace_back(ICmp);
272 LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
273 } else if (makeIVComparisonInvariant(ICmp, IVOperand)) {
274 // fallthrough to end of function
275 } else if (ICmpInst::isSigned(OriginalPred) &&
276 SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) {
277 // If we were unable to make anything above, all we can is to canonicalize
278 // the comparison hoping that it will open the doors for other
279 // optimizations. If we find out that we compare two non-negative values,
280 // we turn the instruction's predicate to its unsigned version. Note that
281 // we cannot rely on Pred here unless we check if we have swapped it.
282 assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?");
283 LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp
284 << '\n');
285 ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred));
286 } else
287 return;
288
289 ++NumElimCmp;
290 Changed = true;
291}
292
293bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) {
294 // Get the SCEVs for the ICmp operands.
295 auto *N = SE->getSCEV(SDiv->getOperand(0));
296 auto *D = SE->getSCEV(SDiv->getOperand(1));
297
298 // Simplify unnecessary loops away.
299 const Loop *L = LI->getLoopFor(SDiv->getParent());
300 N = SE->getSCEVAtScope(N, L);
301 D = SE->getSCEVAtScope(D, L);
302
303 // Replace sdiv by udiv if both of the operands are non-negative
304 if (SE->isKnownNonNegative(N) && SE->isKnownNonNegative(D)) {
305 auto *UDiv = BinaryOperator::Create(
306 BinaryOperator::UDiv, SDiv->getOperand(0), SDiv->getOperand(1),
307 SDiv->getName() + ".udiv", SDiv->getIterator());
308 UDiv->setIsExact(SDiv->isExact());
309 SDiv->replaceAllUsesWith(UDiv);
310 LLVM_DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv << '\n');
311 ++NumSimplifiedSDiv;
312 Changed = true;
313 DeadInsts.push_back(SDiv);
314 return true;
315 }
316
317 return false;
318}
319
320// i %s n -> i %u n if i >= 0 and n >= 0
321void SimplifyIndvar::replaceSRemWithURem(BinaryOperator *Rem) {
322 auto *N = Rem->getOperand(0), *D = Rem->getOperand(1);
323 auto *URem = BinaryOperator::Create(BinaryOperator::URem, N, D,
324 Rem->getName() + ".urem", Rem->getIterator());
325 Rem->replaceAllUsesWith(URem);
326 LLVM_DEBUG(dbgs() << "INDVARS: Simplified srem: " << *Rem << '\n');
327 ++NumSimplifiedSRem;
328 Changed = true;
329 DeadInsts.emplace_back(Rem);
330}
331
332// i % n --> i if i is in [0,n).
333void SimplifyIndvar::replaceRemWithNumerator(BinaryOperator *Rem) {
334 Rem->replaceAllUsesWith(Rem->getOperand(0));
335 LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
336 ++NumElimRem;
337 Changed = true;
338 DeadInsts.emplace_back(Rem);
339}
340
341// (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
342void SimplifyIndvar::replaceRemWithNumeratorOrZero(BinaryOperator *Rem) {
343 auto *T = Rem->getType();
344 auto *N = Rem->getOperand(0), *D = Rem->getOperand(1);
345 ICmpInst *ICmp = new ICmpInst(Rem->getIterator(), ICmpInst::ICMP_EQ, N, D);
346 SelectInst *Sel =
347 SelectInst::Create(ICmp, ConstantInt::get(T, 0), N, "iv.rem", Rem->getIterator());
348 Rem->replaceAllUsesWith(Sel);
349 LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
350 ++NumElimRem;
351 Changed = true;
352 DeadInsts.emplace_back(Rem);
353}
354
355/// SimplifyIVUsers helper for eliminating useless remainder operations
356/// operating on an induction variable or replacing srem by urem.
357void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem,
358 Instruction *IVOperand,
359 bool IsSigned) {
360 auto *NValue = Rem->getOperand(0);
361 auto *DValue = Rem->getOperand(1);
362 // We're only interested in the case where we know something about
363 // the numerator, unless it is a srem, because we want to replace srem by urem
364 // in general.
365 bool UsedAsNumerator = IVOperand == NValue;
366 if (!UsedAsNumerator && !IsSigned)
367 return;
368
369 const SCEV *N = SE->getSCEV(NValue);
370
371 // Simplify unnecessary loops away.
372 const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
373 N = SE->getSCEVAtScope(N, ICmpLoop);
374
375 bool IsNumeratorNonNegative = !IsSigned || SE->isKnownNonNegative(N);
376
377 // Do not proceed if the Numerator may be negative
378 if (!IsNumeratorNonNegative)
379 return;
380
381 const SCEV *D = SE->getSCEV(DValue);
382 D = SE->getSCEVAtScope(D, ICmpLoop);
383
384 if (UsedAsNumerator) {
385 auto LT = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
386 if (SE->isKnownPredicate(LT, N, D)) {
387 replaceRemWithNumerator(Rem);
388 return;
389 }
390
391 auto *T = Rem->getType();
392 const auto *NLessOne = SE->getMinusSCEV(N, SE->getOne(T));
393 if (SE->isKnownPredicate(LT, NLessOne, D)) {
394 replaceRemWithNumeratorOrZero(Rem);
395 return;
396 }
397 }
398
399 // Try to replace SRem with URem, if both N and D are known non-negative.
400 // Since we had already check N, we only need to check D now
401 if (!IsSigned || !SE->isKnownNonNegative(D))
402 return;
403
404 replaceSRemWithURem(Rem);
405}
406
407bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) {
408 const SCEV *LHS = SE->getSCEV(WO->getLHS());
409 const SCEV *RHS = SE->getSCEV(WO->getRHS());
410 if (!SE->willNotOverflow(WO->getBinaryOp(), WO->isSigned(), LHS, RHS))
411 return false;
412
413 // Proved no overflow, nuke the overflow check and, if possible, the overflow
414 // intrinsic as well.
415
417 WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), "", WO->getIterator());
418
419 if (WO->isSigned())
420 NewResult->setHasNoSignedWrap(true);
421 else
422 NewResult->setHasNoUnsignedWrap(true);
423
425
426 for (auto *U : WO->users()) {
427 if (auto *EVI = dyn_cast<ExtractValueInst>(U)) {
428 if (EVI->getIndices()[0] == 1)
429 EVI->replaceAllUsesWith(ConstantInt::getFalse(WO->getContext()));
430 else {
431 assert(EVI->getIndices()[0] == 0 && "Only two possibilities!");
432 EVI->replaceAllUsesWith(NewResult);
433 }
434 ToDelete.push_back(EVI);
435 }
436 }
437
438 for (auto *EVI : ToDelete)
439 EVI->eraseFromParent();
440
441 if (WO->use_empty())
442 WO->eraseFromParent();
443
444 Changed = true;
445 return true;
446}
447
448bool SimplifyIndvar::eliminateSaturatingIntrinsic(SaturatingInst *SI) {
449 const SCEV *LHS = SE->getSCEV(SI->getLHS());
450 const SCEV *RHS = SE->getSCEV(SI->getRHS());
451 if (!SE->willNotOverflow(SI->getBinaryOp(), SI->isSigned(), LHS, RHS))
452 return false;
453
455 SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI->getIterator());
456 if (SI->isSigned())
457 BO->setHasNoSignedWrap();
458 else
460
461 SI->replaceAllUsesWith(BO);
462 DeadInsts.emplace_back(SI);
463 Changed = true;
464 return true;
465}
466
467bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) {
468 // It is always legal to replace
469 // icmp <pred> i32 trunc(iv), n
470 // with
471 // icmp <pred> i64 sext(trunc(iv)), sext(n), if pred is signed predicate.
472 // Or with
473 // icmp <pred> i64 zext(trunc(iv)), zext(n), if pred is unsigned predicate.
474 // Or with either of these if pred is an equality predicate.
475 //
476 // If we can prove that iv == sext(trunc(iv)) or iv == zext(trunc(iv)) for
477 // every comparison which uses trunc, it means that we can replace each of
478 // them with comparison of iv against sext/zext(n). We no longer need trunc
479 // after that.
480 //
481 // TODO: Should we do this if we can widen *some* comparisons, but not all
482 // of them? Sometimes it is enough to enable other optimizations, but the
483 // trunc instruction will stay in the loop.
484 Value *IV = TI->getOperand(0);
485 Type *IVTy = IV->getType();
486 const SCEV *IVSCEV = SE->getSCEV(IV);
487 const SCEV *TISCEV = SE->getSCEV(TI);
488
489 // Check if iv == zext(trunc(iv)) and if iv == sext(trunc(iv)). If so, we can
490 // get rid of trunc
491 bool DoesSExtCollapse = false;
492 bool DoesZExtCollapse = false;
493 if (IVSCEV == SE->getSignExtendExpr(TISCEV, IVTy))
494 DoesSExtCollapse = true;
495 if (IVSCEV == SE->getZeroExtendExpr(TISCEV, IVTy))
496 DoesZExtCollapse = true;
497
498 // If neither sext nor zext does collapse, it is not profitable to do any
499 // transform. Bail.
500 if (!DoesSExtCollapse && !DoesZExtCollapse)
501 return false;
502
503 // Collect users of the trunc that look like comparisons against invariants.
504 // Bail if we find something different.
506 for (auto *U : TI->users()) {
507 // We don't care about users in unreachable blocks.
508 if (isa<Instruction>(U) &&
509 !DT->isReachableFromEntry(cast<Instruction>(U)->getParent()))
510 continue;
511 ICmpInst *ICI = dyn_cast<ICmpInst>(U);
512 if (!ICI) return false;
513 assert(L->contains(ICI->getParent()) && "LCSSA form broken?");
514 if (!(ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) &&
515 !(ICI->getOperand(1) == TI && L->isLoopInvariant(ICI->getOperand(0))))
516 return false;
517 // If we cannot get rid of trunc, bail.
518 if (ICI->isSigned() && !DoesSExtCollapse)
519 return false;
520 if (ICI->isUnsigned() && !DoesZExtCollapse)
521 return false;
522 // For equality, either signed or unsigned works.
523 ICmpUsers.push_back(ICI);
524 }
525
526 auto CanUseZExt = [&](ICmpInst *ICI) {
527 // Unsigned comparison can be widened as unsigned.
528 if (ICI->isUnsigned())
529 return true;
530 // Is it profitable to do zext?
531 if (!DoesZExtCollapse)
532 return false;
533 // For equality, we can safely zext both parts.
534 if (ICI->isEquality())
535 return true;
536 // Otherwise we can only use zext when comparing two non-negative or two
537 // negative values. But in practice, we will never pass DoesZExtCollapse
538 // check for a negative value, because zext(trunc(x)) is non-negative. So
539 // it only make sense to check for non-negativity here.
540 const SCEV *SCEVOP1 = SE->getSCEV(ICI->getOperand(0));
541 const SCEV *SCEVOP2 = SE->getSCEV(ICI->getOperand(1));
542 return SE->isKnownNonNegative(SCEVOP1) && SE->isKnownNonNegative(SCEVOP2);
543 };
544 // Replace all comparisons against trunc with comparisons against IV.
545 for (auto *ICI : ICmpUsers) {
546 bool IsSwapped = L->isLoopInvariant(ICI->getOperand(0));
547 auto *Op1 = IsSwapped ? ICI->getOperand(0) : ICI->getOperand(1);
548 IRBuilder<> Builder(ICI);
549 Value *Ext = nullptr;
550 // For signed/unsigned predicate, replace the old comparison with comparison
551 // of immediate IV against sext/zext of the invariant argument. If we can
552 // use either sext or zext (i.e. we are dealing with equality predicate),
553 // then prefer zext as a more canonical form.
554 // TODO: If we see a signed comparison which can be turned into unsigned,
555 // we can do it here for canonicalization purposes.
556 ICmpInst::Predicate Pred = ICI->getPredicate();
557 if (IsSwapped) Pred = ICmpInst::getSwappedPredicate(Pred);
558 if (CanUseZExt(ICI)) {
559 assert(DoesZExtCollapse && "Unprofitable zext?");
560 Ext = Builder.CreateZExt(Op1, IVTy, "zext");
562 } else {
563 assert(DoesSExtCollapse && "Unprofitable sext?");
564 Ext = Builder.CreateSExt(Op1, IVTy, "sext");
565 assert(Pred == ICmpInst::getSignedPredicate(Pred) && "Must be signed!");
566 }
567 bool Changed;
568 L->makeLoopInvariant(Ext, Changed);
569 (void)Changed;
570 auto *NewCmp = Builder.CreateICmp(Pred, IV, Ext);
571 ICI->replaceAllUsesWith(NewCmp);
572 DeadInsts.emplace_back(ICI);
573 }
574
575 // Trunc no longer needed.
577 DeadInsts.emplace_back(TI);
578 return true;
579}
580
581/// Eliminate an operation that consumes a simple IV and has no observable
582/// side-effect given the range of IV values. IVOperand is guaranteed SCEVable,
583/// but UseInst may not be.
584bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
585 Instruction *IVOperand) {
586 if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
587 eliminateIVComparison(ICmp, IVOperand);
588 return true;
589 }
590 if (BinaryOperator *Bin = dyn_cast<BinaryOperator>(UseInst)) {
591 bool IsSRem = Bin->getOpcode() == Instruction::SRem;
592 if (IsSRem || Bin->getOpcode() == Instruction::URem) {
593 simplifyIVRemainder(Bin, IVOperand, IsSRem);
594 return true;
595 }
596
597 if (Bin->getOpcode() == Instruction::SDiv)
598 return eliminateSDiv(Bin);
599 }
600
601 if (auto *WO = dyn_cast<WithOverflowInst>(UseInst))
602 if (eliminateOverflowIntrinsic(WO))
603 return true;
604
605 if (auto *SI = dyn_cast<SaturatingInst>(UseInst))
606 if (eliminateSaturatingIntrinsic(SI))
607 return true;
608
609 if (auto *TI = dyn_cast<TruncInst>(UseInst))
610 if (eliminateTrunc(TI))
611 return true;
612
613 if (eliminateIdentitySCEV(UseInst, IVOperand))
614 return true;
615
616 return false;
617}
618
620 if (auto *BB = L->getLoopPreheader())
621 return BB->getTerminator();
622
623 return Hint;
624}
625
626/// Replace the UseInst with a loop invariant expression if it is safe.
627bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) {
628 if (!SE->isSCEVable(I->getType()))
629 return false;
630
631 // Get the symbolic expression for this instruction.
632 const SCEV *S = SE->getSCEV(I);
633
634 if (!SE->isLoopInvariant(S, L))
635 return false;
636
637 // Do not generate something ridiculous even if S is loop invariant.
638 if (Rewriter.isHighCostExpansion(S, L, SCEVCheapExpansionBudget, TTI, I))
639 return false;
640
641 auto *IP = GetLoopInvariantInsertPosition(L, I);
642
643 if (!Rewriter.isSafeToExpandAt(S, IP)) {
644 LLVM_DEBUG(dbgs() << "INDVARS: Can not replace IV user: " << *I
645 << " with non-speculable loop invariant: " << *S << '\n');
646 return false;
647 }
648
649 auto *Invariant = Rewriter.expandCodeFor(S, I->getType(), IP);
650 bool NeedToEmitLCSSAPhis = false;
651 if (!LI->replacementPreservesLCSSAForm(I, Invariant))
652 NeedToEmitLCSSAPhis = true;
653
654 I->replaceAllUsesWith(Invariant);
655 LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I
656 << " with loop invariant: " << *S << '\n');
657
658 if (NeedToEmitLCSSAPhis) {
659 SmallVector<Instruction *, 1> NeedsLCSSAPhis;
660 NeedsLCSSAPhis.push_back(cast<Instruction>(Invariant));
661 formLCSSAForInstructions(NeedsLCSSAPhis, *DT, *LI, SE);
662 LLVM_DEBUG(dbgs() << " INDVARS: Replacement breaks LCSSA form"
663 << " inserting LCSSA Phis" << '\n');
664 }
665 ++NumFoldedUser;
666 Changed = true;
667 DeadInsts.emplace_back(I);
668 return true;
669}
670
671/// Eliminate redundant type cast between integer and float.
672bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) {
673 if (UseInst->getOpcode() != CastInst::SIToFP &&
674 UseInst->getOpcode() != CastInst::UIToFP)
675 return false;
676
677 Instruction *IVOperand = cast<Instruction>(UseInst->getOperand(0));
678 // Get the symbolic expression for this instruction.
679 const SCEV *IV = SE->getSCEV(IVOperand);
680 int MaskBits;
681 if (UseInst->getOpcode() == CastInst::SIToFP)
682 MaskBits = (int)SE->getSignedRange(IV).getMinSignedBits();
683 else
684 MaskBits = (int)SE->getUnsignedRange(IV).getActiveBits();
685 int DestNumSigBits = UseInst->getType()->getFPMantissaWidth();
686 if (MaskBits <= DestNumSigBits) {
687 for (User *U : UseInst->users()) {
688 // Match for fptosi/fptoui of sitofp and with same type.
689 auto *CI = dyn_cast<CastInst>(U);
690 if (!CI)
691 continue;
692
693 CastInst::CastOps Opcode = CI->getOpcode();
694 if (Opcode != CastInst::FPToSI && Opcode != CastInst::FPToUI)
695 continue;
696
697 Value *Conv = nullptr;
698 if (IVOperand->getType() != CI->getType()) {
699 IRBuilder<> Builder(CI);
700 StringRef Name = IVOperand->getName();
701 // To match InstCombine logic, we only need sext if both fptosi and
702 // sitofp are used. If one of them is unsigned, then we can use zext.
703 if (SE->getTypeSizeInBits(IVOperand->getType()) >
704 SE->getTypeSizeInBits(CI->getType())) {
705 Conv = Builder.CreateTrunc(IVOperand, CI->getType(), Name + ".trunc");
706 } else if (Opcode == CastInst::FPToUI ||
707 UseInst->getOpcode() == CastInst::UIToFP) {
708 Conv = Builder.CreateZExt(IVOperand, CI->getType(), Name + ".zext");
709 } else {
710 Conv = Builder.CreateSExt(IVOperand, CI->getType(), Name + ".sext");
711 }
712 } else
713 Conv = IVOperand;
714
715 CI->replaceAllUsesWith(Conv);
716 DeadInsts.push_back(CI);
717 LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *CI
718 << " with: " << *Conv << '\n');
719
720 ++NumFoldedUser;
721 Changed = true;
722 }
723 }
724
725 return Changed;
726}
727
728/// Eliminate any operation that SCEV can prove is an identity function.
729bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
730 Instruction *IVOperand) {
731 if (!SE->isSCEVable(UseInst->getType()) ||
732 UseInst->getType() != IVOperand->getType())
733 return false;
734
735 const SCEV *UseSCEV = SE->getSCEV(UseInst);
736 if (UseSCEV != SE->getSCEV(IVOperand))
737 return false;
738
739 // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the
740 // dominator tree, even if X is an operand to Y. For instance, in
741 //
742 // %iv = phi i32 {0,+,1}
743 // br %cond, label %left, label %merge
744 //
745 // left:
746 // %X = add i32 %iv, 0
747 // br label %merge
748 //
749 // merge:
750 // %M = phi (%X, %iv)
751 //
752 // getSCEV(%M) == getSCEV(%X) == {0,+,1}, but %X does not dominate %M, and
753 // %M.replaceAllUsesWith(%X) would be incorrect.
754
755 if (isa<PHINode>(UseInst))
756 // If UseInst is not a PHI node then we know that IVOperand dominates
757 // UseInst directly from the legality of SSA.
758 if (!DT || !DT->dominates(IVOperand, UseInst))
759 return false;
760
761 if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand))
762 return false;
763
764 // Make sure the operand is not more poisonous than the instruction.
765 if (!impliesPoison(IVOperand, UseInst)) {
766 SmallVector<Instruction *> DropPoisonGeneratingInsts;
767 if (!SE->canReuseInstruction(UseSCEV, IVOperand, DropPoisonGeneratingInsts))
768 return false;
769
770 for (Instruction *I : DropPoisonGeneratingInsts)
771 I->dropPoisonGeneratingAnnotations();
772 }
773
774 LLVM_DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
775
776 SE->forgetValue(UseInst);
777 UseInst->replaceAllUsesWith(IVOperand);
778 ++NumElimIdentity;
779 Changed = true;
780 DeadInsts.emplace_back(UseInst);
781 return true;
782}
783
784bool SimplifyIndvar::strengthenBinaryOp(BinaryOperator *BO,
785 Instruction *IVOperand) {
786 return (isa<OverflowingBinaryOperator>(BO) &&
787 strengthenOverflowingOperation(BO, IVOperand)) ||
788 (isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand));
789}
790
791/// Annotate BO with nsw / nuw if it provably does not signed-overflow /
792/// unsigned-overflow. Returns true if anything changed, false otherwise.
793bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
794 Instruction *IVOperand) {
795 auto Flags = SE->getStrengthenedNoWrapFlagsFromBinOp(
796 cast<OverflowingBinaryOperator>(BO));
797
798 if (!Flags)
799 return false;
800
805
806 // The getStrengthenedNoWrapFlagsFromBinOp() check inferred additional nowrap
807 // flags on addrecs while performing zero/sign extensions. We could call
808 // forgetValue() here to make sure those flags also propagate to any other
809 // SCEV expressions based on the addrec. However, this can have pathological
810 // compile-time impact, see https://bugs.llvm.org/show_bug.cgi?id=50384.
811 return true;
812}
813
814/// Annotate the Shr in (X << IVOperand) >> C as exact using the
815/// information from the IV's range. Returns true if anything changed, false
816/// otherwise.
817bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO,
818 Instruction *IVOperand) {
819 if (BO->getOpcode() == Instruction::Shl) {
820 bool Changed = false;
821 ConstantRange IVRange = SE->getUnsignedRange(SE->getSCEV(IVOperand));
822 for (auto *U : BO->users()) {
823 const APInt *C;
824 if (match(U,
825 m_AShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C))) ||
826 match(U,
827 m_LShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C)))) {
828 BinaryOperator *Shr = cast<BinaryOperator>(U);
829 if (!Shr->isExact() && IVRange.getUnsignedMin().uge(*C)) {
830 Shr->setIsExact(true);
831 Changed = true;
832 }
833 }
834 }
835 return Changed;
836 }
837
838 return false;
839}
840
841/// Add all uses of Def to the current IV's worklist.
842static void pushIVUsers(
843 Instruction *Def, Loop *L,
845 SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
846
847 for (User *U : Def->users()) {
848 Instruction *UI = cast<Instruction>(U);
849
850 // Avoid infinite or exponential worklist processing.
851 // Also ensure unique worklist users.
852 // If Def is a LoopPhi, it may not be in the Simplified set, so check for
853 // self edges first.
854 if (UI == Def)
855 continue;
856
857 // Only change the current Loop, do not change the other parts (e.g. other
858 // Loops).
859 if (!L->contains(UI))
860 continue;
861
862 // Do not push the same instruction more than once.
863 if (!Simplified.insert(UI).second)
864 continue;
865
866 SimpleIVUsers.push_back(std::make_pair(UI, Def));
867 }
868}
869
870/// Return true if this instruction generates a simple SCEV
871/// expression in terms of that IV.
872///
873/// This is similar to IVUsers' isInteresting() but processes each instruction
874/// non-recursively when the operand is already known to be a simpleIVUser.
875///
876static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
877 if (!SE->isSCEVable(I->getType()))
878 return false;
879
880 // Get the symbolic expression for this instruction.
881 const SCEV *S = SE->getSCEV(I);
882
883 // Only consider affine recurrences.
884 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
885 if (AR && AR->getLoop() == L)
886 return true;
887
888 return false;
889}
890
891/// Iteratively perform simplification on a worklist of users
892/// of the specified induction variable. Each successive simplification may push
893/// more users which may themselves be candidates for simplification.
894///
895/// This algorithm does not require IVUsers analysis. Instead, it simplifies
896/// instructions in-place during analysis. Rather than rewriting induction
897/// variables bottom-up from their users, it transforms a chain of IVUsers
898/// top-down, updating the IR only when it encounters a clear optimization
899/// opportunity.
900///
901/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
902///
903void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
904 if (!SE->isSCEVable(CurrIV->getType()))
905 return;
906
907 // Instructions processed by SimplifyIndvar for CurrIV.
909
910 // Use-def pairs if IV users waiting to be processed for CurrIV.
912
913 // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
914 // called multiple times for the same LoopPhi. This is the proper thing to
915 // do for loop header phis that use each other.
916 pushIVUsers(CurrIV, L, Simplified, SimpleIVUsers);
917
918 while (!SimpleIVUsers.empty()) {
919 std::pair<Instruction*, Instruction*> UseOper =
920 SimpleIVUsers.pop_back_val();
921 Instruction *UseInst = UseOper.first;
922
923 // If a user of the IndVar is trivially dead, we prefer just to mark it dead
924 // rather than try to do some complex analysis or transformation (such as
925 // widening) basing on it.
926 // TODO: Propagate TLI and pass it here to handle more cases.
927 if (isInstructionTriviallyDead(UseInst, /* TLI */ nullptr)) {
928 DeadInsts.emplace_back(UseInst);
929 continue;
930 }
931
932 // Bypass back edges to avoid extra work.
933 if (UseInst == CurrIV) continue;
934
935 // Try to replace UseInst with a loop invariant before any other
936 // simplifications.
937 if (replaceIVUserWithLoopInvariant(UseInst))
938 continue;
939
940 // Go further for the bitcast 'prtoint ptr to i64' or if the cast is done
941 // by truncation
942 if ((isa<PtrToIntInst>(UseInst)) || (isa<TruncInst>(UseInst)))
943 for (Use &U : UseInst->uses()) {
944 Instruction *User = cast<Instruction>(U.getUser());
945 if (replaceIVUserWithLoopInvariant(User))
946 break; // done replacing
947 }
948
949 Instruction *IVOperand = UseOper.second;
950 for (unsigned N = 0; IVOperand; ++N) {
951 assert(N <= Simplified.size() && "runaway iteration");
952 (void) N;
953
954 Value *NewOper = foldIVUser(UseInst, IVOperand);
955 if (!NewOper)
956 break; // done folding
957 IVOperand = dyn_cast<Instruction>(NewOper);
958 }
959 if (!IVOperand)
960 continue;
961
962 if (eliminateIVUser(UseInst, IVOperand)) {
963 pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
964 continue;
965 }
966
967 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseInst)) {
968 if (strengthenBinaryOp(BO, IVOperand)) {
969 // re-queue uses of the now modified binary operator and fall
970 // through to the checks that remain.
971 pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
972 }
973 }
974
975 // Try to use integer induction for FPToSI of float induction directly.
976 if (replaceFloatIVWithIntegerIV(UseInst)) {
977 // Re-queue the potentially new direct uses of IVOperand.
978 pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
979 continue;
980 }
981
982 CastInst *Cast = dyn_cast<CastInst>(UseInst);
983 if (V && Cast) {
984 V->visitCast(Cast);
985 continue;
986 }
987 if (isSimpleIVUser(UseInst, L, SE)) {
988 pushIVUsers(UseInst, L, Simplified, SimpleIVUsers);
989 }
990 }
991}
992
993namespace llvm {
994
996
997/// Simplify instructions that use this induction variable
998/// by using ScalarEvolution to analyze the IV's recurrence.
999/// Returns a pair where the first entry indicates that the function makes
1000/// changes and the second entry indicates that it introduced new opportunities
1001/// for loop unswitching.
1002std::pair<bool, bool> simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE,
1003 DominatorTree *DT, LoopInfo *LI,
1004 const TargetTransformInfo *TTI,
1006 SCEVExpander &Rewriter, IVVisitor *V) {
1007 SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, TTI,
1008 Rewriter, Dead);
1009 SIV.simplifyUsers(CurrIV, V);
1010 return {SIV.hasChanged(), SIV.runUnswitching()};
1011}
1012
1013/// Simplify users of induction variables within this
1014/// loop. This does not actually change or add IVs.
1016 LoopInfo *LI, const TargetTransformInfo *TTI,
1018 SCEVExpander Rewriter(*SE, SE->getDataLayout(), "indvars");
1019#ifndef NDEBUG
1020 Rewriter.setDebugType(DEBUG_TYPE);
1021#endif
1022 bool Changed = false;
1023 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1024 const auto &[C, _] =
1025 simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, TTI, Dead, Rewriter);
1026 Changed |= C;
1027 }
1028 return Changed;
1029}
1030
1031} // namespace llvm
1032
1033namespace {
1034//===----------------------------------------------------------------------===//
1035// Widen Induction Variables - Extend the width of an IV to cover its
1036// widest uses.
1037//===----------------------------------------------------------------------===//
1038
1039class WidenIV {
1040 // Parameters
1041 PHINode *OrigPhi;
1042 Type *WideType;
1043
1044 // Context
1045 LoopInfo *LI;
1046 Loop *L;
1047 ScalarEvolution *SE;
1048 DominatorTree *DT;
1049
1050 // Does the module have any calls to the llvm.experimental.guard intrinsic
1051 // at all? If not we can avoid scanning instructions looking for guards.
1052 bool HasGuards;
1053
1054 bool UsePostIncrementRanges;
1055
1056 // Statistics
1057 unsigned NumElimExt = 0;
1058 unsigned NumWidened = 0;
1059
1060 // Result
1061 PHINode *WidePhi = nullptr;
1062 Instruction *WideInc = nullptr;
1063 const SCEV *WideIncExpr = nullptr;
1065
1067
1068 enum class ExtendKind { Zero, Sign, Unknown };
1069
1070 // A map tracking the kind of extension used to widen each narrow IV
1071 // and narrow IV user.
1072 // Key: pointer to a narrow IV or IV user.
1073 // Value: the kind of extension used to widen this Instruction.
1074 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
1075
1076 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
1077
1078 // A map with control-dependent ranges for post increment IV uses. The key is
1079 // a pair of IV def and a use of this def denoting the context. The value is
1080 // a ConstantRange representing possible values of the def at the given
1081 // context.
1082 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
1083
1084 std::optional<ConstantRange> getPostIncRangeInfo(Value *Def,
1085 Instruction *UseI) {
1086 DefUserPair Key(Def, UseI);
1087 auto It = PostIncRangeInfos.find(Key);
1088 return It == PostIncRangeInfos.end()
1089 ? std::optional<ConstantRange>(std::nullopt)
1090 : std::optional<ConstantRange>(It->second);
1091 }
1092
1093 void calculatePostIncRanges(PHINode *OrigPhi);
1094 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
1095
1096 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
1097 DefUserPair Key(Def, UseI);
1098 auto It = PostIncRangeInfos.find(Key);
1099 if (It == PostIncRangeInfos.end())
1100 PostIncRangeInfos.insert({Key, R});
1101 else
1102 It->second = R.intersectWith(It->second);
1103 }
1104
1105public:
1106 /// Record a link in the Narrow IV def-use chain along with the WideIV that
1107 /// computes the same value as the Narrow IV def. This avoids caching Use*
1108 /// pointers.
1109 struct NarrowIVDefUse {
1110 Instruction *NarrowDef = nullptr;
1111 Instruction *NarrowUse = nullptr;
1112 Instruction *WideDef = nullptr;
1113
1114 // True if the narrow def is never negative. Tracking this information lets
1115 // us use a sign extension instead of a zero extension or vice versa, when
1116 // profitable and legal.
1117 bool NeverNegative = false;
1118
1119 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
1120 bool NeverNegative)
1121 : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
1122 NeverNegative(NeverNegative) {}
1123 };
1124
1125 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1127 bool HasGuards, bool UsePostIncrementRanges = true);
1128
1129 PHINode *createWideIV(SCEVExpander &Rewriter);
1130
1131 unsigned getNumElimExt() { return NumElimExt; };
1132 unsigned getNumWidened() { return NumWidened; };
1133
1134protected:
1135 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1136 Instruction *Use);
1137
1138 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1139 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1140 const SCEVAddRecExpr *WideAR);
1141 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1142
1143 ExtendKind getExtendKind(Instruction *I);
1144
1145 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1146
1147 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1148
1149 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1150
1151 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1152 unsigned OpCode) const;
1153
1154 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter,
1155 PHINode *OrigPhi, PHINode *WidePhi);
1156 void truncateIVUse(NarrowIVDefUse DU);
1157
1158 bool widenLoopCompare(NarrowIVDefUse DU);
1159 bool widenWithVariantUse(NarrowIVDefUse DU);
1160
1161 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1162
1163private:
1164 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
1165};
1166} // namespace
1167
1168/// Determine the insertion point for this user. By default, insert immediately
1169/// before the user. SCEVExpander or LICM will hoist loop invariants out of the
1170/// loop. For PHI nodes, there may be multiple uses, so compute the nearest
1171/// common dominator for the incoming blocks. A nullptr can be returned if no
1172/// viable location is found: it may happen if User is a PHI and Def only comes
1173/// to this PHI from unreachable blocks.
1175 DominatorTree *DT, LoopInfo *LI) {
1176 PHINode *PHI = dyn_cast<PHINode>(User);
1177 if (!PHI)
1178 return User;
1179
1180 Instruction *InsertPt = nullptr;
1181 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
1182 if (PHI->getIncomingValue(i) != Def)
1183 continue;
1184
1185 BasicBlock *InsertBB = PHI->getIncomingBlock(i);
1186
1187 if (!DT->isReachableFromEntry(InsertBB))
1188 continue;
1189
1190 if (!InsertPt) {
1191 InsertPt = InsertBB->getTerminator();
1192 continue;
1193 }
1194 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
1195 InsertPt = InsertBB->getTerminator();
1196 }
1197
1198 // If we have skipped all inputs, it means that Def only comes to Phi from
1199 // unreachable blocks.
1200 if (!InsertPt)
1201 return nullptr;
1202
1203 auto *DefI = dyn_cast<Instruction>(Def);
1204 if (!DefI)
1205 return InsertPt;
1206
1207 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
1208
1209 auto *L = LI->getLoopFor(DefI->getParent());
1210 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
1211
1212 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
1213 if (LI->getLoopFor(DTN->getBlock()) == L)
1214 return DTN->getBlock()->getTerminator();
1215
1216 llvm_unreachable("DefI dominates InsertPt!");
1217}
1218
1219WidenIV::WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1221 bool HasGuards, bool UsePostIncrementRanges)
1222 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1223 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1224 HasGuards(HasGuards), UsePostIncrementRanges(UsePostIncrementRanges),
1225 DeadInsts(DI) {
1226 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1227 ExtendKindMap[OrigPhi] = WI.IsSigned ? ExtendKind::Sign : ExtendKind::Zero;
1228}
1229
1230Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1231 bool IsSigned, Instruction *Use) {
1232 // Set the debug location and conservative insertion point.
1233 IRBuilder<> Builder(Use);
1234 // Hoist the insertion point into loop preheaders as far as possible.
1235 for (const Loop *L = LI->getLoopFor(Use->getParent());
1236 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1237 L = L->getParentLoop())
1238 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1239
1240 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1241 Builder.CreateZExt(NarrowOper, WideType);
1242}
1243
1244/// Instantiate a wide operation to replace a narrow operation. This only needs
1245/// to handle operations that can evaluation to SCEVAddRec. It can safely return
1246/// 0 for any operation we decide not to clone.
1247Instruction *WidenIV::cloneIVUser(WidenIV::NarrowIVDefUse DU,
1248 const SCEVAddRecExpr *WideAR) {
1249 unsigned Opcode = DU.NarrowUse->getOpcode();
1250 switch (Opcode) {
1251 default:
1252 return nullptr;
1253 case Instruction::Add:
1254 case Instruction::Mul:
1255 case Instruction::UDiv:
1256 case Instruction::Sub:
1257 return cloneArithmeticIVUser(DU, WideAR);
1258
1259 case Instruction::And:
1260 case Instruction::Or:
1261 case Instruction::Xor:
1262 case Instruction::Shl:
1263 case Instruction::LShr:
1264 case Instruction::AShr:
1265 return cloneBitwiseIVUser(DU);
1266 }
1267}
1268
1269Instruction *WidenIV::cloneBitwiseIVUser(WidenIV::NarrowIVDefUse DU) {
1270 Instruction *NarrowUse = DU.NarrowUse;
1271 Instruction *NarrowDef = DU.NarrowDef;
1272 Instruction *WideDef = DU.WideDef;
1273
1274 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1275
1276 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1277 // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1278 // invariant and will be folded or hoisted. If it actually comes from a
1279 // widened IV, it should be removed during a future call to widenIVUse.
1280 bool IsSigned = getExtendKind(NarrowDef) == ExtendKind::Sign;
1281 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1282 ? WideDef
1283 : createExtendInst(NarrowUse->getOperand(0), WideType,
1284 IsSigned, NarrowUse);
1285 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1286 ? WideDef
1287 : createExtendInst(NarrowUse->getOperand(1), WideType,
1288 IsSigned, NarrowUse);
1289
1290 auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1291 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1292 NarrowBO->getName());
1293 IRBuilder<> Builder(NarrowUse);
1294 Builder.Insert(WideBO);
1295 WideBO->copyIRFlags(NarrowBO);
1296 return WideBO;
1297}
1298
1299Instruction *WidenIV::cloneArithmeticIVUser(WidenIV::NarrowIVDefUse DU,
1300 const SCEVAddRecExpr *WideAR) {
1301 Instruction *NarrowUse = DU.NarrowUse;
1302 Instruction *NarrowDef = DU.NarrowDef;
1303 Instruction *WideDef = DU.WideDef;
1304
1305 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1306
1307 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1308
1309 // We're trying to find X such that
1310 //
1311 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1312 //
1313 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1314 // and check using SCEV if any of them are correct.
1315
1316 // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1317 // correct solution to X.
1318 auto GuessNonIVOperand = [&](bool SignExt) {
1319 const SCEV *WideLHS;
1320 const SCEV *WideRHS;
1321
1322 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1323 if (SignExt)
1324 return SE->getSignExtendExpr(S, Ty);
1325 return SE->getZeroExtendExpr(S, Ty);
1326 };
1327
1328 if (IVOpIdx == 0) {
1329 WideLHS = SE->getSCEV(WideDef);
1330 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1331 WideRHS = GetExtend(NarrowRHS, WideType);
1332 } else {
1333 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1334 WideLHS = GetExtend(NarrowLHS, WideType);
1335 WideRHS = SE->getSCEV(WideDef);
1336 }
1337
1338 // WideUse is "WideDef `op.wide` X" as described in the comment.
1339 const SCEV *WideUse =
1340 getSCEVByOpCode(WideLHS, WideRHS, NarrowUse->getOpcode());
1341
1342 return WideUse == WideAR;
1343 };
1344
1345 bool SignExtend = getExtendKind(NarrowDef) == ExtendKind::Sign;
1346 if (!GuessNonIVOperand(SignExtend)) {
1347 SignExtend = !SignExtend;
1348 if (!GuessNonIVOperand(SignExtend))
1349 return nullptr;
1350 }
1351
1352 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1353 ? WideDef
1354 : createExtendInst(NarrowUse->getOperand(0), WideType,
1355 SignExtend, NarrowUse);
1356 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1357 ? WideDef
1358 : createExtendInst(NarrowUse->getOperand(1), WideType,
1359 SignExtend, NarrowUse);
1360
1361 auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1362 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1363 NarrowBO->getName());
1364
1365 IRBuilder<> Builder(NarrowUse);
1366 Builder.Insert(WideBO);
1367 WideBO->copyIRFlags(NarrowBO);
1368 return WideBO;
1369}
1370
1371WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1372 auto It = ExtendKindMap.find(I);
1373 assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1374 return It->second;
1375}
1376
1377const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1378 unsigned OpCode) const {
1379 switch (OpCode) {
1380 case Instruction::Add:
1381 return SE->getAddExpr(LHS, RHS);
1382 case Instruction::Sub:
1383 return SE->getMinusSCEV(LHS, RHS);
1384 case Instruction::Mul:
1385 return SE->getMulExpr(LHS, RHS);
1386 case Instruction::UDiv:
1387 return SE->getUDivExpr(LHS, RHS);
1388 default:
1389 llvm_unreachable("Unsupported opcode.");
1390 };
1391}
1392
1393namespace {
1394
1395// Represents a interesting integer binary operation for
1396// getExtendedOperandRecurrence. This may be a shl that is being treated as a
1397// multiply or a 'or disjoint' that is being treated as 'add nsw nuw'.
1398struct BinaryOp {
1399 unsigned Opcode;
1400 std::array<Value *, 2> Operands;
1401 bool IsNSW = false;
1402 bool IsNUW = false;
1403
1404 explicit BinaryOp(Instruction *Op)
1405 : Opcode(Op->getOpcode()),
1406 Operands({Op->getOperand(0), Op->getOperand(1)}) {
1407 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Op)) {
1408 IsNSW = OBO->hasNoSignedWrap();
1409 IsNUW = OBO->hasNoUnsignedWrap();
1410 }
1411 }
1412
1413 explicit BinaryOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS,
1414 bool IsNSW = false, bool IsNUW = false)
1415 : Opcode(Opcode), Operands({LHS, RHS}), IsNSW(IsNSW), IsNUW(IsNUW) {}
1416};
1417
1418} // end anonymous namespace
1419
1420static std::optional<BinaryOp> matchBinaryOp(Instruction *Op) {
1421 switch (Op->getOpcode()) {
1422 case Instruction::Add:
1423 case Instruction::Sub:
1424 case Instruction::Mul:
1425 return BinaryOp(Op);
1426 case Instruction::Or: {
1427 // Convert or disjoint into add nuw nsw.
1428 if (cast<PossiblyDisjointInst>(Op)->isDisjoint())
1429 return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1),
1430 /*IsNSW=*/true, /*IsNUW=*/true);
1431 break;
1432 }
1433 case Instruction::Shl: {
1434 if (ConstantInt *SA = dyn_cast<ConstantInt>(Op->getOperand(1))) {
1435 unsigned BitWidth = cast<IntegerType>(SA->getType())->getBitWidth();
1436
1437 // If the shift count is not less than the bitwidth, the result of
1438 // the shift is undefined. Don't try to analyze it, because the
1439 // resolution chosen here may differ from the resolution chosen in
1440 // other parts of the compiler.
1441 if (SA->getValue().ult(BitWidth)) {
1442 // We can safely preserve the nuw flag in all cases. It's also safe to
1443 // turn a nuw nsw shl into a nuw nsw mul. However, nsw in isolation
1444 // requires special handling. It can be preserved as long as we're not
1445 // left shifting by bitwidth - 1.
1446 bool IsNUW = Op->hasNoUnsignedWrap();
1447 bool IsNSW = Op->hasNoSignedWrap() &&
1448 (IsNUW || SA->getValue().ult(BitWidth - 1));
1449
1450 ConstantInt *X =
1451 ConstantInt::get(Op->getContext(),
1452 APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
1453 return BinaryOp(Instruction::Mul, Op->getOperand(0), X, IsNSW, IsNUW);
1454 }
1455 }
1456
1457 break;
1458 }
1459 }
1460
1461 return std::nullopt;
1462}
1463
1464/// No-wrap operations can transfer sign extension of their result to their
1465/// operands. Generate the SCEV value for the widened operation without
1466/// actually modifying the IR yet. If the expression after extending the
1467/// operands is an AddRec for this loop, return the AddRec and the kind of
1468/// extension used.
1469WidenIV::WidenedRecTy
1470WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) {
1471 auto Op = matchBinaryOp(DU.NarrowUse);
1472 if (!Op)
1473 return {nullptr, ExtendKind::Unknown};
1474
1475 assert((Op->Opcode == Instruction::Add || Op->Opcode == Instruction::Sub ||
1476 Op->Opcode == Instruction::Mul) &&
1477 "Unexpected opcode");
1478
1479 // One operand (NarrowDef) has already been extended to WideDef. Now determine
1480 // if extending the other will lead to a recurrence.
1481 const unsigned ExtendOperIdx = Op->Operands[0] == DU.NarrowDef ? 1 : 0;
1482 assert(Op->Operands[1 - ExtendOperIdx] == DU.NarrowDef && "bad DU");
1483
1484 ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1485 if (!(ExtKind == ExtendKind::Sign && Op->IsNSW) &&
1486 !(ExtKind == ExtendKind::Zero && Op->IsNUW)) {
1487 ExtKind = ExtendKind::Unknown;
1488
1489 // For a non-negative NarrowDef, we can choose either type of
1490 // extension. We want to use the current extend kind if legal
1491 // (see above), and we only hit this code if we need to check
1492 // the opposite case.
1493 if (DU.NeverNegative) {
1494 if (Op->IsNSW) {
1495 ExtKind = ExtendKind::Sign;
1496 } else if (Op->IsNUW) {
1497 ExtKind = ExtendKind::Zero;
1498 }
1499 }
1500 }
1501
1502 const SCEV *ExtendOperExpr = SE->getSCEV(Op->Operands[ExtendOperIdx]);
1503 if (ExtKind == ExtendKind::Sign)
1504 ExtendOperExpr = SE->getSignExtendExpr(ExtendOperExpr, WideType);
1505 else if (ExtKind == ExtendKind::Zero)
1506 ExtendOperExpr = SE->getZeroExtendExpr(ExtendOperExpr, WideType);
1507 else
1508 return {nullptr, ExtendKind::Unknown};
1509
1510 // When creating this SCEV expr, don't apply the current operations NSW or NUW
1511 // flags. This instruction may be guarded by control flow that the no-wrap
1512 // behavior depends on. Non-control-equivalent instructions can be mapped to
1513 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1514 // semantics to those operations.
1515 const SCEV *lhs = SE->getSCEV(DU.WideDef);
1516 const SCEV *rhs = ExtendOperExpr;
1517
1518 // Let's swap operands to the initial order for the case of non-commutative
1519 // operations, like SUB. See PR21014.
1520 if (ExtendOperIdx == 0)
1521 std::swap(lhs, rhs);
1522 const SCEVAddRecExpr *AddRec =
1523 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, Op->Opcode));
1524
1525 if (!AddRec || AddRec->getLoop() != L)
1526 return {nullptr, ExtendKind::Unknown};
1527
1528 return {AddRec, ExtKind};
1529}
1530
1531/// Is this instruction potentially interesting for further simplification after
1532/// widening it's type? In other words, can the extend be safely hoisted out of
1533/// the loop with SCEV reducing the value to a recurrence on the same loop. If
1534/// so, return the extended recurrence and the kind of extension used. Otherwise
1535/// return {nullptr, ExtendKind::Unknown}.
1536WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) {
1537 if (!DU.NarrowUse->getType()->isIntegerTy())
1538 return {nullptr, ExtendKind::Unknown};
1539
1540 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1541 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1542 SE->getTypeSizeInBits(WideType)) {
1543 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1544 // index. So don't follow this use.
1545 return {nullptr, ExtendKind::Unknown};
1546 }
1547
1548 const SCEV *WideExpr;
1549 ExtendKind ExtKind;
1550 if (DU.NeverNegative) {
1551 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1552 if (isa<SCEVAddRecExpr>(WideExpr))
1553 ExtKind = ExtendKind::Sign;
1554 else {
1555 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1556 ExtKind = ExtendKind::Zero;
1557 }
1558 } else if (getExtendKind(DU.NarrowDef) == ExtendKind::Sign) {
1559 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1560 ExtKind = ExtendKind::Sign;
1561 } else {
1562 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1563 ExtKind = ExtendKind::Zero;
1564 }
1565 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1566 if (!AddRec || AddRec->getLoop() != L)
1567 return {nullptr, ExtendKind::Unknown};
1568 return {AddRec, ExtKind};
1569}
1570
1571/// This IV user cannot be widened. Replace this use of the original narrow IV
1572/// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1573void WidenIV::truncateIVUse(NarrowIVDefUse DU) {
1574 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1575 if (!InsertPt)
1576 return;
1577 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1578 << *DU.NarrowUse << "\n");
1579 ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1580 IRBuilder<> Builder(InsertPt);
1581 Value *Trunc =
1582 Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType(), "",
1583 DU.NeverNegative || ExtKind == ExtendKind::Zero,
1584 DU.NeverNegative || ExtKind == ExtendKind::Sign);
1585 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1586}
1587
1588/// If the narrow use is a compare instruction, then widen the compare
1589// (and possibly the other operand). The extend operation is hoisted into the
1590// loop preheader as far as possible.
1591bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) {
1592 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1593 if (!Cmp)
1594 return false;
1595
1596 // We can legally widen the comparison in the following two cases:
1597 //
1598 // - The signedness of the IV extension and comparison match
1599 //
1600 // - The narrow IV is always positive (and thus its sign extension is equal
1601 // to its zero extension). For instance, let's say we're zero extending
1602 // %narrow for the following use
1603 //
1604 // icmp slt i32 %narrow, %val ... (A)
1605 //
1606 // and %narrow is always positive. Then
1607 //
1608 // (A) == icmp slt i32 sext(%narrow), sext(%val)
1609 // == icmp slt i32 zext(%narrow), sext(%val)
1610 bool IsSigned = getExtendKind(DU.NarrowDef) == ExtendKind::Sign;
1611 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1612 return false;
1613
1614 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1615 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1616 unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1617 assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1618
1619 // Widen the compare instruction.
1620 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1621
1622 // Widen the other operand of the compare, if necessary.
1623 if (CastWidth < IVWidth) {
1624 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1625 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1626 }
1627 return true;
1628}
1629
1630// The widenIVUse avoids generating trunc by evaluating the use as AddRec, this
1631// will not work when:
1632// 1) SCEV traces back to an instruction inside the loop that SCEV can not
1633// expand, eg. add %indvar, (load %addr)
1634// 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant
1635// While SCEV fails to avoid trunc, we can still try to use instruction
1636// combining approach to prove trunc is not required. This can be further
1637// extended with other instruction combining checks, but for now we handle the
1638// following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext")
1639//
1640// Src:
1641// %c = sub nsw %b, %indvar
1642// %d = sext %c to i64
1643// Dst:
1644// %indvar.ext1 = sext %indvar to i64
1645// %m = sext %b to i64
1646// %d = sub nsw i64 %m, %indvar.ext1
1647// Therefore, as long as the result of add/sub/mul is extended to wide type, no
1648// trunc is required regardless of how %b is generated. This pattern is common
1649// when calculating address in 64 bit architecture
1650bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
1651 Instruction *NarrowUse = DU.NarrowUse;
1652 Instruction *NarrowDef = DU.NarrowDef;
1653 Instruction *WideDef = DU.WideDef;
1654
1655 // Handle the common case of add<nsw/nuw>
1656 const unsigned OpCode = NarrowUse->getOpcode();
1657 // Only Add/Sub/Mul instructions are supported.
1658 if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1659 OpCode != Instruction::Mul)
1660 return false;
1661
1662 // The operand that is not defined by NarrowDef of DU. Let's call it the
1663 // other operand.
1664 assert((NarrowUse->getOperand(0) == NarrowDef ||
1665 NarrowUse->getOperand(1) == NarrowDef) &&
1666 "bad DU");
1667
1668 const OverflowingBinaryOperator *OBO =
1669 cast<OverflowingBinaryOperator>(NarrowUse);
1670 ExtendKind ExtKind = getExtendKind(NarrowDef);
1671 bool CanSignExtend = ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap();
1672 bool CanZeroExtend = ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap();
1673 auto AnotherOpExtKind = ExtKind;
1674
1675 // Check that all uses are either:
1676 // - narrow def (in case of we are widening the IV increment);
1677 // - single-input LCSSA Phis;
1678 // - comparison of the chosen type;
1679 // - extend of the chosen type (raison d'etre).
1681 SmallVector<PHINode *, 4> LCSSAPhiUsers;
1683 for (Use &U : NarrowUse->uses()) {
1684 Instruction *User = cast<Instruction>(U.getUser());
1685 if (User == NarrowDef)
1686 continue;
1687 if (!L->contains(User)) {
1688 auto *LCSSAPhi = cast<PHINode>(User);
1689 // Make sure there is only 1 input, so that we don't have to split
1690 // critical edges.
1691 if (LCSSAPhi->getNumOperands() != 1)
1692 return false;
1693 LCSSAPhiUsers.push_back(LCSSAPhi);
1694 continue;
1695 }
1696 if (auto *ICmp = dyn_cast<ICmpInst>(User)) {
1697 auto Pred = ICmp->getPredicate();
1698 // We have 3 types of predicates: signed, unsigned and equality
1699 // predicates. For equality, it's legal to widen icmp for either sign and
1700 // zero extend. For sign extend, we can also do so for signed predicates,
1701 // likeweise for zero extend we can widen icmp for unsigned predicates.
1702 if (ExtKind == ExtendKind::Zero && ICmpInst::isSigned(Pred))
1703 return false;
1704 if (ExtKind == ExtendKind::Sign && ICmpInst::isUnsigned(Pred))
1705 return false;
1706 ICmpUsers.push_back(ICmp);
1707 continue;
1708 }
1709 if (ExtKind == ExtendKind::Sign)
1710 User = dyn_cast<SExtInst>(User);
1711 else
1712 User = dyn_cast<ZExtInst>(User);
1713 if (!User || User->getType() != WideType)
1714 return false;
1715 ExtUsers.push_back(User);
1716 }
1717 if (ExtUsers.empty()) {
1718 DeadInsts.emplace_back(NarrowUse);
1719 return true;
1720 }
1721
1722 // We'll prove some facts that should be true in the context of ext users. If
1723 // there is no users, we are done now. If there are some, pick their common
1724 // dominator as context.
1725 const Instruction *CtxI = findCommonDominator(ExtUsers, *DT);
1726
1727 if (!CanSignExtend && !CanZeroExtend) {
1728 // Because InstCombine turns 'sub nuw' to 'add' losing the no-wrap flag, we
1729 // will most likely not see it. Let's try to prove it.
1730 if (OpCode != Instruction::Add)
1731 return false;
1732 if (ExtKind != ExtendKind::Zero)
1733 return false;
1734 const SCEV *LHS = SE->getSCEV(OBO->getOperand(0));
1735 const SCEV *RHS = SE->getSCEV(OBO->getOperand(1));
1736 // TODO: Support case for NarrowDef = NarrowUse->getOperand(1).
1737 if (NarrowUse->getOperand(0) != NarrowDef)
1738 return false;
1739 if (!SE->isKnownNegative(RHS))
1740 return false;
1741 bool ProvedSubNUW = SE->isKnownPredicateAt(ICmpInst::ICMP_UGE, LHS,
1742 SE->getNegativeSCEV(RHS), CtxI);
1743 if (!ProvedSubNUW)
1744 return false;
1745 // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as
1746 // neg(zext(neg(op))), which is basically sext(op).
1747 AnotherOpExtKind = ExtendKind::Sign;
1748 }
1749
1750 // Verifying that Defining operand is an AddRec
1751 const SCEV *Op1 = SE->getSCEV(WideDef);
1752 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1753 if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1754 return false;
1755
1756 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1757
1758 // Generating a widening use instruction.
1759 Value *LHS =
1760 (NarrowUse->getOperand(0) == NarrowDef)
1761 ? WideDef
1762 : createExtendInst(NarrowUse->getOperand(0), WideType,
1763 AnotherOpExtKind == ExtendKind::Sign, NarrowUse);
1764 Value *RHS =
1765 (NarrowUse->getOperand(1) == NarrowDef)
1766 ? WideDef
1767 : createExtendInst(NarrowUse->getOperand(1), WideType,
1768 AnotherOpExtKind == ExtendKind::Sign, NarrowUse);
1769
1770 auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1771 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1772 NarrowBO->getName());
1773 IRBuilder<> Builder(NarrowUse);
1774 Builder.Insert(WideBO);
1775 WideBO->copyIRFlags(NarrowBO);
1776 ExtendKindMap[NarrowUse] = ExtKind;
1777
1778 for (Instruction *User : ExtUsers) {
1779 assert(User->getType() == WideType && "Checked before!");
1780 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1781 << *WideBO << "\n");
1782 ++NumElimExt;
1783 User->replaceAllUsesWith(WideBO);
1784 DeadInsts.emplace_back(User);
1785 }
1786
1787 for (PHINode *User : LCSSAPhiUsers) {
1788 assert(User->getNumOperands() == 1 && "Checked before!");
1789 Builder.SetInsertPoint(User);
1790 auto *WidePN =
1791 Builder.CreatePHI(WideBO->getType(), 1, User->getName() + ".wide");
1792 BasicBlock *LoopExitingBlock = User->getParent()->getSinglePredecessor();
1793 assert(LoopExitingBlock && L->contains(LoopExitingBlock) &&
1794 "Not a LCSSA Phi?");
1795 WidePN->addIncoming(WideBO, LoopExitingBlock);
1796 Builder.SetInsertPoint(User->getParent(),
1797 User->getParent()->getFirstInsertionPt());
1798 auto *TruncPN = Builder.CreateTrunc(WidePN, User->getType());
1799 User->replaceAllUsesWith(TruncPN);
1800 DeadInsts.emplace_back(User);
1801 }
1802
1803 for (ICmpInst *User : ICmpUsers) {
1804 Builder.SetInsertPoint(User);
1805 auto ExtendedOp = [&](Value * V)->Value * {
1806 if (V == NarrowUse)
1807 return WideBO;
1808 if (ExtKind == ExtendKind::Zero)
1809 return Builder.CreateZExt(V, WideBO->getType());
1810 else
1811 return Builder.CreateSExt(V, WideBO->getType());
1812 };
1813 auto Pred = User->getPredicate();
1814 auto *LHS = ExtendedOp(User->getOperand(0));
1815 auto *RHS = ExtendedOp(User->getOperand(1));
1816 auto *WideCmp =
1817 Builder.CreateICmp(Pred, LHS, RHS, User->getName() + ".wide");
1818 User->replaceAllUsesWith(WideCmp);
1819 DeadInsts.emplace_back(User);
1820 }
1821
1822 return true;
1823}
1824
1825/// Determine whether an individual user of the narrow IV can be widened. If so,
1826/// return the wide clone of the user.
1827Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU,
1828 SCEVExpander &Rewriter, PHINode *OrigPhi,
1829 PHINode *WidePhi) {
1830 assert(ExtendKindMap.count(DU.NarrowDef) &&
1831 "Should already know the kind of extension used to widen NarrowDef");
1832
1833 // This narrow use can be widened by a sext if it's non-negative or its narrow
1834 // def was widened by a sext. Same for zext.
1835 bool CanWidenBySExt =
1836 DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Sign;
1837 bool CanWidenByZExt =
1838 DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Zero;
1839
1840 // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1841 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1842 if (LI->getLoopFor(UsePhi->getParent()) != L) {
1843 // For LCSSA phis, sink the truncate outside the loop.
1844 // After SimplifyCFG most loop exit targets have a single predecessor.
1845 // Otherwise fall back to a truncate within the loop.
1846 if (UsePhi->getNumOperands() != 1)
1847 truncateIVUse(DU);
1848 else {
1849 // Widening the PHI requires us to insert a trunc. The logical place
1850 // for this trunc is in the same BB as the PHI. This is not possible if
1851 // the BB is terminated by a catchswitch.
1852 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1853 return nullptr;
1854
1855 PHINode *WidePhi =
1856 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1857 UsePhi->getIterator());
1858 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1859 BasicBlock *WidePhiBB = WidePhi->getParent();
1860 IRBuilder<> Builder(WidePhiBB, WidePhiBB->getFirstInsertionPt());
1861 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType(), "",
1862 CanWidenByZExt, CanWidenBySExt);
1863 UsePhi->replaceAllUsesWith(Trunc);
1864 DeadInsts.emplace_back(UsePhi);
1865 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1866 << *WidePhi << "\n");
1867 }
1868 return nullptr;
1869 }
1870 }
1871
1872 // Our raison d'etre! Eliminate sign and zero extension.
1873 if ((match(DU.NarrowUse, m_SExtLike(m_Value())) && CanWidenBySExt) ||
1874 (isa<ZExtInst>(DU.NarrowUse) && CanWidenByZExt)) {
1875 Value *NewDef = DU.WideDef;
1876 if (DU.NarrowUse->getType() != WideType) {
1877 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1878 unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1879 if (CastWidth < IVWidth) {
1880 // The cast isn't as wide as the IV, so insert a Trunc.
1881 IRBuilder<> Builder(DU.NarrowUse);
1882 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType(), "",
1883 CanWidenByZExt, CanWidenBySExt);
1884 }
1885 else {
1886 // A wider extend was hidden behind a narrower one. This may induce
1887 // another round of IV widening in which the intermediate IV becomes
1888 // dead. It should be very rare.
1889 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1890 << " not wide enough to subsume " << *DU.NarrowUse
1891 << "\n");
1892 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1893 NewDef = DU.NarrowUse;
1894 }
1895 }
1896 if (NewDef != DU.NarrowUse) {
1897 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1898 << " replaced by " << *DU.WideDef << "\n");
1899 ++NumElimExt;
1900 DU.NarrowUse->replaceAllUsesWith(NewDef);
1901 DeadInsts.emplace_back(DU.NarrowUse);
1902 }
1903 // Now that the extend is gone, we want to expose it's uses for potential
1904 // further simplification. We don't need to directly inform SimplifyIVUsers
1905 // of the new users, because their parent IV will be processed later as a
1906 // new loop phi. If we preserved IVUsers analysis, we would also want to
1907 // push the uses of WideDef here.
1908
1909 // No further widening is needed. The deceased [sz]ext had done it for us.
1910 return nullptr;
1911 }
1912
1913 auto tryAddRecExpansion = [&]() -> Instruction* {
1914 // Does this user itself evaluate to a recurrence after widening?
1915 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1916 if (!WideAddRec.first)
1917 WideAddRec = getWideRecurrence(DU);
1918 assert((WideAddRec.first == nullptr) ==
1919 (WideAddRec.second == ExtendKind::Unknown));
1920 if (!WideAddRec.first)
1921 return nullptr;
1922
1923 // Reuse the IV increment that SCEVExpander created. Recompute flags, unless
1924 // the flags for both increments agree and it is safe to use the ones from
1925 // the original inc. In that case, the new use of the wide increment won't
1926 // be more poisonous.
1927 bool NeedToRecomputeFlags =
1929 DU.NarrowUse, WideInc) ||
1930 DU.NarrowUse->hasNoUnsignedWrap() != WideInc->hasNoUnsignedWrap() ||
1931 DU.NarrowUse->hasNoSignedWrap() != WideInc->hasNoSignedWrap();
1932 Instruction *WideUse = nullptr;
1933 if (WideAddRec.first == WideIncExpr &&
1934 Rewriter.hoistIVInc(WideInc, DU.NarrowUse, NeedToRecomputeFlags))
1935 WideUse = WideInc;
1936 else {
1937 WideUse = cloneIVUser(DU, WideAddRec.first);
1938 if (!WideUse)
1939 return nullptr;
1940 }
1941 // Evaluation of WideAddRec ensured that the narrow expression could be
1942 // extended outside the loop without overflow. This suggests that the wide use
1943 // evaluates to the same expression as the extended narrow use, but doesn't
1944 // absolutely guarantee it. Hence the following failsafe check. In rare cases
1945 // where it fails, we simply throw away the newly created wide use.
1946 if (WideAddRec.first != SE->getSCEV(WideUse)) {
1947 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1948 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1949 << "\n");
1950 DeadInsts.emplace_back(WideUse);
1951 return nullptr;
1952 };
1953
1954 // if we reached this point then we are going to replace
1955 // DU.NarrowUse with WideUse. Reattach DbgValue then.
1956 replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1957
1958 ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1959 // Returning WideUse pushes it on the worklist.
1960 return WideUse;
1961 };
1962
1963 if (auto *I = tryAddRecExpansion())
1964 return I;
1965
1966 // If use is a loop condition, try to promote the condition instead of
1967 // truncating the IV first.
1968 if (widenLoopCompare(DU))
1969 return nullptr;
1970
1971 // We are here about to generate a truncate instruction that may hurt
1972 // performance because the scalar evolution expression computed earlier
1973 // in WideAddRec.first does not indicate a polynomial induction expression.
1974 // In that case, look at the operands of the use instruction to determine
1975 // if we can still widen the use instead of truncating its operand.
1976 if (widenWithVariantUse(DU))
1977 return nullptr;
1978
1979 // This user does not evaluate to a recurrence after widening, so don't
1980 // follow it. Instead insert a Trunc to kill off the original use,
1981 // eventually isolating the original narrow IV so it can be removed.
1982 truncateIVUse(DU);
1983 return nullptr;
1984}
1985
1986/// Add eligible users of NarrowDef to NarrowIVUsers.
1987void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1988 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1989 bool NonNegativeDef =
1990 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1991 SE->getZero(NarrowSCEV->getType()));
1992 for (User *U : NarrowDef->users()) {
1993 Instruction *NarrowUser = cast<Instruction>(U);
1994
1995 // Handle data flow merges and bizarre phi cycles.
1996 if (!Widened.insert(NarrowUser).second)
1997 continue;
1998
1999 bool NonNegativeUse = false;
2000 if (!NonNegativeDef) {
2001 // We might have a control-dependent range information for this context.
2002 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
2003 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
2004 }
2005
2006 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
2007 NonNegativeDef || NonNegativeUse);
2008 }
2009}
2010
2011/// Process a single induction variable. First use the SCEVExpander to create a
2012/// wide induction variable that evaluates to the same recurrence as the
2013/// original narrow IV. Then use a worklist to forward traverse the narrow IV's
2014/// def-use chain. After widenIVUse has processed all interesting IV users, the
2015/// narrow IV will be isolated for removal by DeleteDeadPHIs.
2016///
2017/// It would be simpler to delete uses as they are processed, but we must avoid
2018/// invalidating SCEV expressions.
2019PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
2020 // Is this phi an induction variable?
2021 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
2022 if (!AddRec)
2023 return nullptr;
2024
2025 // Widen the induction variable expression.
2026 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == ExtendKind::Sign
2027 ? SE->getSignExtendExpr(AddRec, WideType)
2028 : SE->getZeroExtendExpr(AddRec, WideType);
2029
2030 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
2031 "Expect the new IV expression to preserve its type");
2032
2033 // Can the IV be extended outside the loop without overflow?
2034 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
2035 if (!AddRec || AddRec->getLoop() != L)
2036 return nullptr;
2037
2038 // An AddRec must have loop-invariant operands. Since this AddRec is
2039 // materialized by a loop header phi, the expression cannot have any post-loop
2040 // operands, so they must dominate the loop header.
2041 assert(
2042 SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
2043 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
2044 "Loop header phi recurrence inputs do not dominate the loop");
2045
2046 // Iterate over IV uses (including transitive ones) looking for IV increments
2047 // of the form 'add nsw %iv, <const>'. For each increment and each use of
2048 // the increment calculate control-dependent range information basing on
2049 // dominating conditions inside of the loop (e.g. a range check inside of the
2050 // loop). Calculated ranges are stored in PostIncRangeInfos map.
2051 //
2052 // Control-dependent range information is later used to prove that a narrow
2053 // definition is not negative (see pushNarrowIVUsers). It's difficult to do
2054 // this on demand because when pushNarrowIVUsers needs this information some
2055 // of the dominating conditions might be already widened.
2057 calculatePostIncRanges(OrigPhi);
2058
2059 // The rewriter provides a value for the desired IV expression. This may
2060 // either find an existing phi or materialize a new one. Either way, we
2061 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
2062 // of the phi-SCC dominates the loop entry.
2063 Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt();
2064 Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt);
2065 // If the wide phi is not a phi node, for example a cast node, like bitcast,
2066 // inttoptr, ptrtoint, just skip for now.
2067 if (!(WidePhi = dyn_cast<PHINode>(ExpandInst))) {
2068 // if the cast node is an inserted instruction without any user, we should
2069 // remove it to make sure the pass don't touch the function as we can not
2070 // wide the phi.
2071 if (ExpandInst->hasNUses(0) &&
2072 Rewriter.isInsertedInstruction(cast<Instruction>(ExpandInst)))
2073 DeadInsts.emplace_back(ExpandInst);
2074 return nullptr;
2075 }
2076
2077 // Remembering the WideIV increment generated by SCEVExpander allows
2078 // widenIVUse to reuse it when widening the narrow IV's increment. We don't
2079 // employ a general reuse mechanism because the call above is the only call to
2080 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
2081 if (BasicBlock *LatchBlock = L->getLoopLatch()) {
2082 WideInc =
2083 dyn_cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
2084 if (WideInc) {
2085 WideIncExpr = SE->getSCEV(WideInc);
2086 // Propagate the debug location associated with the original loop
2087 // increment to the new (widened) increment.
2088 auto *OrigInc =
2089 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
2090
2091 WideInc->setDebugLoc(OrigInc->getDebugLoc());
2092 // We are replacing a narrow IV increment with a wider IV increment. If
2093 // the original (narrow) increment did not wrap, the wider increment one
2094 // should not wrap either. Set the flags to be the union of both wide
2095 // increment and original increment; this ensures we preserve flags SCEV
2096 // could infer for the wider increment. Limit this only to cases where
2097 // both increments directly increment the corresponding PHI nodes and have
2098 // the same opcode. It is not safe to re-use the flags from the original
2099 // increment, if it is more complex and SCEV expansion may have yielded a
2100 // more simplified wider increment.
2102 OrigInc, WideInc) &&
2103 isa<OverflowingBinaryOperator>(OrigInc) &&
2104 isa<OverflowingBinaryOperator>(WideInc)) {
2105 WideInc->setHasNoUnsignedWrap(WideInc->hasNoUnsignedWrap() ||
2106 OrigInc->hasNoUnsignedWrap());
2107 WideInc->setHasNoSignedWrap(WideInc->hasNoSignedWrap() ||
2108 OrigInc->hasNoSignedWrap());
2109 }
2110 }
2111 }
2112
2113 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
2114 ++NumWidened;
2115
2116 // Traverse the def-use chain using a worklist starting at the original IV.
2117 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
2118
2119 Widened.insert(OrigPhi);
2120 pushNarrowIVUsers(OrigPhi, WidePhi);
2121
2122 while (!NarrowIVUsers.empty()) {
2123 WidenIV::NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
2124
2125 // Process a def-use edge. This may replace the use, so don't hold a
2126 // use_iterator across it.
2127 Instruction *WideUse = widenIVUse(DU, Rewriter, OrigPhi, WidePhi);
2128
2129 // Follow all def-use edges from the previous narrow use.
2130 if (WideUse)
2131 pushNarrowIVUsers(DU.NarrowUse, WideUse);
2132
2133 // widenIVUse may have removed the def-use edge.
2134 if (DU.NarrowDef->use_empty())
2135 DeadInsts.emplace_back(DU.NarrowDef);
2136 }
2137
2138 // Attach any debug information to the new PHI.
2139 replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
2140
2141 return WidePhi;
2142}
2143
2144/// Calculates control-dependent range for the given def at the given context
2145/// by looking at dominating conditions inside of the loop
2146void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
2147 Instruction *NarrowUser) {
2148 Value *NarrowDefLHS;
2149 const APInt *NarrowDefRHS;
2150 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
2151 m_APInt(NarrowDefRHS))) ||
2152 !NarrowDefRHS->isNonNegative())
2153 return;
2154
2155 auto UpdateRangeFromCondition = [&] (Value *Condition,
2156 bool TrueDest) {
2157 CmpInst::Predicate Pred;
2158 Value *CmpRHS;
2159 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
2160 m_Value(CmpRHS))))
2161 return;
2162
2164 TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
2165
2166 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
2167 auto CmpConstrainedLHSRange =
2169 auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
2171
2172 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
2173 };
2174
2175 auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
2176 if (!HasGuards)
2177 return;
2178
2179 for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
2180 Ctx->getParent()->rend())) {
2181 Value *C = nullptr;
2182 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
2183 UpdateRangeFromCondition(C, /*TrueDest=*/true);
2184 }
2185 };
2186
2187 UpdateRangeFromGuards(NarrowUser);
2188
2189 BasicBlock *NarrowUserBB = NarrowUser->getParent();
2190 // If NarrowUserBB is statically unreachable asking dominator queries may
2191 // yield surprising results. (e.g. the block may not have a dom tree node)
2192 if (!DT->isReachableFromEntry(NarrowUserBB))
2193 return;
2194
2195 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
2196 L->contains(DTB->getBlock());
2197 DTB = DTB->getIDom()) {
2198 auto *BB = DTB->getBlock();
2199 auto *TI = BB->getTerminator();
2200 UpdateRangeFromGuards(TI);
2201
2202 auto *BI = dyn_cast<BranchInst>(TI);
2203 if (!BI || !BI->isConditional())
2204 continue;
2205
2206 auto *TrueSuccessor = BI->getSuccessor(0);
2207 auto *FalseSuccessor = BI->getSuccessor(1);
2208
2209 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
2210 return BBE.isSingleEdge() &&
2211 DT->dominates(BBE, NarrowUser->getParent());
2212 };
2213
2214 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
2215 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
2216
2217 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
2218 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
2219 }
2220}
2221
2222/// Calculates PostIncRangeInfos map for the given IV
2223void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
2226 Worklist.push_back(OrigPhi);
2227 Visited.insert(OrigPhi);
2228
2229 while (!Worklist.empty()) {
2230 Instruction *NarrowDef = Worklist.pop_back_val();
2231
2232 for (Use &U : NarrowDef->uses()) {
2233 auto *NarrowUser = cast<Instruction>(U.getUser());
2234
2235 // Don't go looking outside the current loop.
2236 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
2237 if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
2238 continue;
2239
2240 if (!Visited.insert(NarrowUser).second)
2241 continue;
2242
2243 Worklist.push_back(NarrowUser);
2244
2245 calculatePostIncRange(NarrowDef, NarrowUser);
2246 }
2247 }
2248}
2249
2251 LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter,
2253 unsigned &NumElimExt, unsigned &NumWidened,
2254 bool HasGuards, bool UsePostIncrementRanges) {
2255 WidenIV Widener(WI, LI, SE, DT, DeadInsts, HasGuards, UsePostIncrementRanges);
2256 PHINode *WidePHI = Widener.createWideIV(Rewriter);
2257 NumElimExt = Widener.getNumElimExt();
2258 NumWidened = Widener.getNumWidened();
2259 return WidePHI;
2260}
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
Rewrite undef for PHI
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define DEBUG_TYPE
#define _
iv Induction Variable Users
Definition: IVUsers.cpp:48
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
static cl::opt< bool > WidenIV("loop-flatten-widen-iv", cl::Hidden, cl::init(true), cl::desc("Widen the loop induction variables, if possible, so " "overflow checks won't reject flattening"))
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Instruction * GetLoopInvariantInsertPosition(Loop *L, Instruction *Hint)
static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE)
Return true if this instruction generates a simple SCEV expression in terms of that IV.
static Instruction * findCommonDominator(ArrayRef< Instruction * > Instructions, DominatorTree &DT)
Find a point in code which dominates all given instructions.
static void pushIVUsers(Instruction *Def, Loop *L, SmallPtrSet< Instruction *, 16 > &Simplified, SmallVectorImpl< std::pair< Instruction *, Instruction * > > &SimpleIVUsers)
Add all uses of Def to the current IV's worklist.
static Instruction * getInsertPointForUses(Instruction *User, Value *Def, DominatorTree *DT, LoopInfo *LI)
Determine the insertion point for this user.
static std::optional< BinaryOp > matchBinaryOp(Instruction *Op)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
Class for arbitrary precision integers.
Definition: APInt.h:77
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:313
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:218
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1200
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Value handle that asserts if the Value is deleted.
Definition: ValueHandle.h:264
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:409
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:165
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
Value * getRHS() const
bool isSigned() const
Whether the intrinsic is signed or unsigned.
Instruction::BinaryOps getBinaryOp() const
Returns the binary operation underlying the intrinsic.
Value * getLHS() const
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a binary instruction, given the opcode and the two operands.
BinaryOps getOpcode() const
Definition: InstrTypes.h:513
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:601
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Definition: InstrTypes.h:1108
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
bool isSigned() const
Definition: InstrTypes.h:1265
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:1129
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:1105
bool isUnsigned() const
Definition: InstrTypes.h:1271
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:857
static ConstantInt * getBool(LLVMContext &Context, bool V)
Definition: Constants.cpp:864
This class represents a range of values.
Definition: ConstantRange.h:47
APInt getUnsignedMin() const
Return the smallest unsigned value contained in the ConstantRange.
static ConstantRange makeAllowedICmpRegion(CmpInst::Predicate Pred, const ConstantRange &Other)
Produce the smallest range such that all values that may satisfy the given predicate with any value c...
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
Definition: Dominators.cpp:344
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
Interface for visiting interesting IV users that are recognized but not simplified by this utility.
virtual void anchor()
void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const BasicBlock * getParent() const
Definition: Instruction.h:152
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
void setIsExact(bool b=true)
Set or clear the exact flag on this instruction, which must be an operator which supports this flag.
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:451
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition: Operator.h:77
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition: Operator.h:110
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition: Operator.h:104
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1814
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class uses information about analyze scalars to rewrite expressions in canonical form.
static bool canReuseFlagsFromOriginalIVInc(PHINode *OrigPhi, PHINode *WidePhi, Instruction *OrigInc, Instruction *WideInc)
Return true if both increments directly increment the corresponding IV PHI nodes and have the same op...
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
Represents a saturating add/sub intrinsic.
The main scalar evolution driver.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
static SCEV::NoWrapFlags maskFlags(SCEV::NoWrapFlags Flags, int Mask)
Convenient NoWrapFlags manipulation that hides enum casts and is visible in the ScalarEvolution name ...
bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr, BasicBlock::iterator InsertBefore, Instruction *MDFrom=nullptr)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:149
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
Represents an op.with.overflow intrinsic.
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Key
PAL metadata keys.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:875
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
PHINode * createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges)
Widen Induction Variables - Extend the width of an IV to cover its widest uses.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:400
bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
cl::opt< unsigned > SCEVCheapExpansionBudget
bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl< WeakTrackingVH > &Dead)
SimplifyLoopIVs - Simplify users of induction variables within this loop.
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:2715
std::pair< bool, bool > simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
Definition: LCSSA.cpp:77
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Collect information about induction variables that are used by sign/zero extend operations.