LLVM 19.0.0git
SimplifyIndVar.cpp
Go to the documentation of this file.
1//===-- SimplifyIndVar.cpp - Induction variable simplification ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements induction variable simplification. It does
10// not define any actual pass or policy, but provides a single function to
11// simplify a loop's induction variables based on ScalarEvolution.
12//
13//===----------------------------------------------------------------------===//
14
17#include "llvm/ADT/Statistic.h"
20#include "llvm/IR/Dominators.h"
21#include "llvm/IR/IRBuilder.h"
25#include "llvm/Support/Debug.h"
30
31using namespace llvm;
32using namespace llvm::PatternMatch;
33
34#define DEBUG_TYPE "indvars"
35
36STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
37STATISTIC(NumElimOperand, "Number of IV operands folded into a use");
38STATISTIC(NumFoldedUser, "Number of IV users folded into a constant");
39STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
41 NumSimplifiedSDiv,
42 "Number of IV signed division operations converted to unsigned division");
44 NumSimplifiedSRem,
45 "Number of IV signed remainder operations converted to unsigned remainder");
46STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
47
48namespace {
49 /// This is a utility for simplifying induction variables
50 /// based on ScalarEvolution. It is the primary instrument of the
51 /// IndvarSimplify pass, but it may also be directly invoked to cleanup after
52 /// other loop passes that preserve SCEV.
53 class SimplifyIndvar {
54 Loop *L;
55 LoopInfo *LI;
57 DominatorTree *DT;
61
62 bool Changed = false;
63 bool RunUnswitching = false;
64
65 public:
66 SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT,
68 SCEVExpander &Rewriter,
70 : L(Loop), LI(LI), SE(SE), DT(DT), TTI(TTI), Rewriter(Rewriter),
71 DeadInsts(Dead) {
72 assert(LI && "IV simplification requires LoopInfo");
73 }
74
75 bool hasChanged() const { return Changed; }
76 bool runUnswitching() const { return RunUnswitching; }
77
78 /// Iteratively perform simplification on a worklist of users of the
79 /// specified induction variable. This is the top-level driver that applies
80 /// all simplifications to users of an IV.
81 void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr);
82
83 void pushIVUsers(Instruction *Def,
85 SmallVectorImpl<std::pair<Instruction *, Instruction *>>
86 &SimpleIVUsers);
87
88 Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand);
89
90 bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);
91 bool replaceIVUserWithLoopInvariant(Instruction *UseInst);
92 bool replaceFloatIVWithIntegerIV(Instruction *UseInst);
93
94 bool eliminateOverflowIntrinsic(WithOverflowInst *WO);
95 bool eliminateSaturatingIntrinsic(SaturatingInst *SI);
96 bool eliminateTrunc(TruncInst *TI);
97 bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
98 bool makeIVComparisonInvariant(ICmpInst *ICmp, Instruction *IVOperand);
99 void eliminateIVComparison(ICmpInst *ICmp, Instruction *IVOperand);
100 void simplifyIVRemainder(BinaryOperator *Rem, Instruction *IVOperand,
101 bool IsSigned);
102 void replaceRemWithNumerator(BinaryOperator *Rem);
103 void replaceRemWithNumeratorOrZero(BinaryOperator *Rem);
104 void replaceSRemWithURem(BinaryOperator *Rem);
105 bool eliminateSDiv(BinaryOperator *SDiv);
106 bool strengthenBinaryOp(BinaryOperator *BO, Instruction *IVOperand);
107 bool strengthenOverflowingOperation(BinaryOperator *OBO,
108 Instruction *IVOperand);
109 bool strengthenRightShift(BinaryOperator *BO, Instruction *IVOperand);
110 };
111}
112
113/// Find a point in code which dominates all given instructions. We can safely
114/// assume that, whatever fact we can prove at the found point, this fact is
115/// also true for each of the given instructions.
117 DominatorTree &DT) {
118 Instruction *CommonDom = nullptr;
119 for (auto *Insn : Instructions)
120 CommonDom =
121 CommonDom ? DT.findNearestCommonDominator(CommonDom, Insn) : Insn;
122 assert(CommonDom && "Common dominator not found?");
123 return CommonDom;
124}
125
126/// Fold an IV operand into its use. This removes increments of an
127/// aligned IV when used by a instruction that ignores the low bits.
128///
129/// IVOperand is guaranteed SCEVable, but UseInst may not be.
130///
131/// Return the operand of IVOperand for this induction variable if IVOperand can
132/// be folded (in case more folding opportunities have been exposed).
133/// Otherwise return null.
134Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) {
135 Value *IVSrc = nullptr;
136 const unsigned OperIdx = 0;
137 const SCEV *FoldedExpr = nullptr;
138 bool MustDropExactFlag = false;
139 switch (UseInst->getOpcode()) {
140 default:
141 return nullptr;
142 case Instruction::UDiv:
143 case Instruction::LShr:
144 // We're only interested in the case where we know something about
145 // the numerator and have a constant denominator.
146 if (IVOperand != UseInst->getOperand(OperIdx) ||
147 !isa<ConstantInt>(UseInst->getOperand(1)))
148 return nullptr;
149
150 // Attempt to fold a binary operator with constant operand.
151 // e.g. ((I + 1) >> 2) => I >> 2
152 if (!isa<BinaryOperator>(IVOperand)
153 || !isa<ConstantInt>(IVOperand->getOperand(1)))
154 return nullptr;
155
156 IVSrc = IVOperand->getOperand(0);
157 // IVSrc must be the (SCEVable) IV, since the other operand is const.
158 assert(SE->isSCEVable(IVSrc->getType()) && "Expect SCEVable IV operand");
159
160 ConstantInt *D = cast<ConstantInt>(UseInst->getOperand(1));
161 if (UseInst->getOpcode() == Instruction::LShr) {
162 // Get a constant for the divisor. See createSCEV.
163 uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth();
164 if (D->getValue().uge(BitWidth))
165 return nullptr;
166
167 D = ConstantInt::get(UseInst->getContext(),
168 APInt::getOneBitSet(BitWidth, D->getZExtValue()));
169 }
170 const auto *LHS = SE->getSCEV(IVSrc);
171 const auto *RHS = SE->getSCEV(D);
172 FoldedExpr = SE->getUDivExpr(LHS, RHS);
173 // We might have 'exact' flag set at this point which will no longer be
174 // correct after we make the replacement.
175 if (UseInst->isExact() && LHS != SE->getMulExpr(FoldedExpr, RHS))
176 MustDropExactFlag = true;
177 }
178 // We have something that might fold it's operand. Compare SCEVs.
179 if (!SE->isSCEVable(UseInst->getType()))
180 return nullptr;
181
182 // Bypass the operand if SCEV can prove it has no effect.
183 if (SE->getSCEV(UseInst) != FoldedExpr)
184 return nullptr;
185
186 LLVM_DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand
187 << " -> " << *UseInst << '\n');
188
189 UseInst->setOperand(OperIdx, IVSrc);
190 assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper");
191
192 if (MustDropExactFlag)
193 UseInst->dropPoisonGeneratingFlags();
194
195 ++NumElimOperand;
196 Changed = true;
197 if (IVOperand->use_empty())
198 DeadInsts.emplace_back(IVOperand);
199 return IVSrc;
200}
201
202bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
203 Instruction *IVOperand) {
204 auto *Preheader = L->getLoopPreheader();
205 if (!Preheader)
206 return false;
207 unsigned IVOperIdx = 0;
208 ICmpInst::Predicate Pred = ICmp->getPredicate();
209 if (IVOperand != ICmp->getOperand(0)) {
210 // Swapped
211 assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
212 IVOperIdx = 1;
213 Pred = ICmpInst::getSwappedPredicate(Pred);
214 }
215
216 // Get the SCEVs for the ICmp operands (in the specific context of the
217 // current loop)
218 const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
219 const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop);
220 const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop);
221 auto LIP = SE->getLoopInvariantPredicate(Pred, S, X, L, ICmp);
222 if (!LIP)
223 return false;
224 ICmpInst::Predicate InvariantPredicate = LIP->Pred;
225 const SCEV *InvariantLHS = LIP->LHS;
226 const SCEV *InvariantRHS = LIP->RHS;
227
228 // Do not generate something ridiculous.
229 auto *PHTerm = Preheader->getTerminator();
230 if (Rewriter.isHighCostExpansion({InvariantLHS, InvariantRHS}, L,
231 2 * SCEVCheapExpansionBudget, TTI, PHTerm) ||
232 !Rewriter.isSafeToExpandAt(InvariantLHS, PHTerm) ||
233 !Rewriter.isSafeToExpandAt(InvariantRHS, PHTerm))
234 return false;
235 auto *NewLHS =
236 Rewriter.expandCodeFor(InvariantLHS, IVOperand->getType(), PHTerm);
237 auto *NewRHS =
238 Rewriter.expandCodeFor(InvariantRHS, IVOperand->getType(), PHTerm);
239 LLVM_DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n');
240 ICmp->setPredicate(InvariantPredicate);
241 ICmp->setOperand(0, NewLHS);
242 ICmp->setOperand(1, NewRHS);
243 RunUnswitching = true;
244 return true;
245}
246
247/// SimplifyIVUsers helper for eliminating useless
248/// comparisons against an induction variable.
249void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp,
250 Instruction *IVOperand) {
251 unsigned IVOperIdx = 0;
252 ICmpInst::Predicate Pred = ICmp->getPredicate();
253 ICmpInst::Predicate OriginalPred = Pred;
254 if (IVOperand != ICmp->getOperand(0)) {
255 // Swapped
256 assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
257 IVOperIdx = 1;
258 Pred = ICmpInst::getSwappedPredicate(Pred);
259 }
260
261 // Get the SCEVs for the ICmp operands (in the specific context of the
262 // current loop)
263 const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
264 const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop);
265 const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop);
266
267 // If the condition is always true or always false in the given context,
268 // replace it with a constant value.
270 for (auto *U : ICmp->users())
271 Users.push_back(cast<Instruction>(U));
272 const Instruction *CtxI = findCommonDominator(Users, *DT);
273 if (auto Ev = SE->evaluatePredicateAt(Pred, S, X, CtxI)) {
274 SE->forgetValue(ICmp);
276 DeadInsts.emplace_back(ICmp);
277 LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
278 } else if (makeIVComparisonInvariant(ICmp, IVOperand)) {
279 // fallthrough to end of function
280 } else if (ICmpInst::isSigned(OriginalPred) &&
281 SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) {
282 // If we were unable to make anything above, all we can is to canonicalize
283 // the comparison hoping that it will open the doors for other
284 // optimizations. If we find out that we compare two non-negative values,
285 // we turn the instruction's predicate to its unsigned version. Note that
286 // we cannot rely on Pred here unless we check if we have swapped it.
287 assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?");
288 LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp
289 << '\n');
290 ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred));
291 } else
292 return;
293
294 ++NumElimCmp;
295 Changed = true;
296}
297
298bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) {
299 // Get the SCEVs for the ICmp operands.
300 auto *N = SE->getSCEV(SDiv->getOperand(0));
301 auto *D = SE->getSCEV(SDiv->getOperand(1));
302
303 // Simplify unnecessary loops away.
304 const Loop *L = LI->getLoopFor(SDiv->getParent());
305 N = SE->getSCEVAtScope(N, L);
306 D = SE->getSCEVAtScope(D, L);
307
308 // Replace sdiv by udiv if both of the operands are non-negative
309 if (SE->isKnownNonNegative(N) && SE->isKnownNonNegative(D)) {
310 auto *UDiv = BinaryOperator::Create(
311 BinaryOperator::UDiv, SDiv->getOperand(0), SDiv->getOperand(1),
312 SDiv->getName() + ".udiv", SDiv->getIterator());
313 UDiv->setIsExact(SDiv->isExact());
314 SDiv->replaceAllUsesWith(UDiv);
315 LLVM_DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv << '\n');
316 ++NumSimplifiedSDiv;
317 Changed = true;
318 DeadInsts.push_back(SDiv);
319 return true;
320 }
321
322 return false;
323}
324
325// i %s n -> i %u n if i >= 0 and n >= 0
326void SimplifyIndvar::replaceSRemWithURem(BinaryOperator *Rem) {
327 auto *N = Rem->getOperand(0), *D = Rem->getOperand(1);
328 auto *URem = BinaryOperator::Create(BinaryOperator::URem, N, D,
329 Rem->getName() + ".urem", Rem->getIterator());
330 Rem->replaceAllUsesWith(URem);
331 LLVM_DEBUG(dbgs() << "INDVARS: Simplified srem: " << *Rem << '\n');
332 ++NumSimplifiedSRem;
333 Changed = true;
334 DeadInsts.emplace_back(Rem);
335}
336
337// i % n --> i if i is in [0,n).
338void SimplifyIndvar::replaceRemWithNumerator(BinaryOperator *Rem) {
339 Rem->replaceAllUsesWith(Rem->getOperand(0));
340 LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
341 ++NumElimRem;
342 Changed = true;
343 DeadInsts.emplace_back(Rem);
344}
345
346// (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
347void SimplifyIndvar::replaceRemWithNumeratorOrZero(BinaryOperator *Rem) {
348 auto *T = Rem->getType();
349 auto *N = Rem->getOperand(0), *D = Rem->getOperand(1);
350 ICmpInst *ICmp = new ICmpInst(Rem->getIterator(), ICmpInst::ICMP_EQ, N, D);
351 SelectInst *Sel =
352 SelectInst::Create(ICmp, ConstantInt::get(T, 0), N, "iv.rem", Rem->getIterator());
353 Rem->replaceAllUsesWith(Sel);
354 LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
355 ++NumElimRem;
356 Changed = true;
357 DeadInsts.emplace_back(Rem);
358}
359
360/// SimplifyIVUsers helper for eliminating useless remainder operations
361/// operating on an induction variable or replacing srem by urem.
362void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem,
363 Instruction *IVOperand,
364 bool IsSigned) {
365 auto *NValue = Rem->getOperand(0);
366 auto *DValue = Rem->getOperand(1);
367 // We're only interested in the case where we know something about
368 // the numerator, unless it is a srem, because we want to replace srem by urem
369 // in general.
370 bool UsedAsNumerator = IVOperand == NValue;
371 if (!UsedAsNumerator && !IsSigned)
372 return;
373
374 const SCEV *N = SE->getSCEV(NValue);
375
376 // Simplify unnecessary loops away.
377 const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
378 N = SE->getSCEVAtScope(N, ICmpLoop);
379
380 bool IsNumeratorNonNegative = !IsSigned || SE->isKnownNonNegative(N);
381
382 // Do not proceed if the Numerator may be negative
383 if (!IsNumeratorNonNegative)
384 return;
385
386 const SCEV *D = SE->getSCEV(DValue);
387 D = SE->getSCEVAtScope(D, ICmpLoop);
388
389 if (UsedAsNumerator) {
390 auto LT = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
391 if (SE->isKnownPredicate(LT, N, D)) {
392 replaceRemWithNumerator(Rem);
393 return;
394 }
395
396 auto *T = Rem->getType();
397 const auto *NLessOne = SE->getMinusSCEV(N, SE->getOne(T));
398 if (SE->isKnownPredicate(LT, NLessOne, D)) {
399 replaceRemWithNumeratorOrZero(Rem);
400 return;
401 }
402 }
403
404 // Try to replace SRem with URem, if both N and D are known non-negative.
405 // Since we had already check N, we only need to check D now
406 if (!IsSigned || !SE->isKnownNonNegative(D))
407 return;
408
409 replaceSRemWithURem(Rem);
410}
411
412bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) {
413 const SCEV *LHS = SE->getSCEV(WO->getLHS());
414 const SCEV *RHS = SE->getSCEV(WO->getRHS());
415 if (!SE->willNotOverflow(WO->getBinaryOp(), WO->isSigned(), LHS, RHS))
416 return false;
417
418 // Proved no overflow, nuke the overflow check and, if possible, the overflow
419 // intrinsic as well.
420
422 WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), "", WO->getIterator());
423
424 if (WO->isSigned())
425 NewResult->setHasNoSignedWrap(true);
426 else
427 NewResult->setHasNoUnsignedWrap(true);
428
430
431 for (auto *U : WO->users()) {
432 if (auto *EVI = dyn_cast<ExtractValueInst>(U)) {
433 if (EVI->getIndices()[0] == 1)
434 EVI->replaceAllUsesWith(ConstantInt::getFalse(WO->getContext()));
435 else {
436 assert(EVI->getIndices()[0] == 0 && "Only two possibilities!");
437 EVI->replaceAllUsesWith(NewResult);
438 }
439 ToDelete.push_back(EVI);
440 }
441 }
442
443 for (auto *EVI : ToDelete)
444 EVI->eraseFromParent();
445
446 if (WO->use_empty())
447 WO->eraseFromParent();
448
449 Changed = true;
450 return true;
451}
452
453bool SimplifyIndvar::eliminateSaturatingIntrinsic(SaturatingInst *SI) {
454 const SCEV *LHS = SE->getSCEV(SI->getLHS());
455 const SCEV *RHS = SE->getSCEV(SI->getRHS());
456 if (!SE->willNotOverflow(SI->getBinaryOp(), SI->isSigned(), LHS, RHS))
457 return false;
458
460 SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI->getIterator());
461 if (SI->isSigned())
462 BO->setHasNoSignedWrap();
463 else
465
466 SI->replaceAllUsesWith(BO);
467 DeadInsts.emplace_back(SI);
468 Changed = true;
469 return true;
470}
471
472bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) {
473 // It is always legal to replace
474 // icmp <pred> i32 trunc(iv), n
475 // with
476 // icmp <pred> i64 sext(trunc(iv)), sext(n), if pred is signed predicate.
477 // Or with
478 // icmp <pred> i64 zext(trunc(iv)), zext(n), if pred is unsigned predicate.
479 // Or with either of these if pred is an equality predicate.
480 //
481 // If we can prove that iv == sext(trunc(iv)) or iv == zext(trunc(iv)) for
482 // every comparison which uses trunc, it means that we can replace each of
483 // them with comparison of iv against sext/zext(n). We no longer need trunc
484 // after that.
485 //
486 // TODO: Should we do this if we can widen *some* comparisons, but not all
487 // of them? Sometimes it is enough to enable other optimizations, but the
488 // trunc instruction will stay in the loop.
489 Value *IV = TI->getOperand(0);
490 Type *IVTy = IV->getType();
491 const SCEV *IVSCEV = SE->getSCEV(IV);
492 const SCEV *TISCEV = SE->getSCEV(TI);
493
494 // Check if iv == zext(trunc(iv)) and if iv == sext(trunc(iv)). If so, we can
495 // get rid of trunc
496 bool DoesSExtCollapse = false;
497 bool DoesZExtCollapse = false;
498 if (IVSCEV == SE->getSignExtendExpr(TISCEV, IVTy))
499 DoesSExtCollapse = true;
500 if (IVSCEV == SE->getZeroExtendExpr(TISCEV, IVTy))
501 DoesZExtCollapse = true;
502
503 // If neither sext nor zext does collapse, it is not profitable to do any
504 // transform. Bail.
505 if (!DoesSExtCollapse && !DoesZExtCollapse)
506 return false;
507
508 // Collect users of the trunc that look like comparisons against invariants.
509 // Bail if we find something different.
511 for (auto *U : TI->users()) {
512 // We don't care about users in unreachable blocks.
513 if (isa<Instruction>(U) &&
514 !DT->isReachableFromEntry(cast<Instruction>(U)->getParent()))
515 continue;
516 ICmpInst *ICI = dyn_cast<ICmpInst>(U);
517 if (!ICI) return false;
518 assert(L->contains(ICI->getParent()) && "LCSSA form broken?");
519 if (!(ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) &&
520 !(ICI->getOperand(1) == TI && L->isLoopInvariant(ICI->getOperand(0))))
521 return false;
522 // If we cannot get rid of trunc, bail.
523 if (ICI->isSigned() && !DoesSExtCollapse)
524 return false;
525 if (ICI->isUnsigned() && !DoesZExtCollapse)
526 return false;
527 // For equality, either signed or unsigned works.
528 ICmpUsers.push_back(ICI);
529 }
530
531 auto CanUseZExt = [&](ICmpInst *ICI) {
532 // Unsigned comparison can be widened as unsigned.
533 if (ICI->isUnsigned())
534 return true;
535 // Is it profitable to do zext?
536 if (!DoesZExtCollapse)
537 return false;
538 // For equality, we can safely zext both parts.
539 if (ICI->isEquality())
540 return true;
541 // Otherwise we can only use zext when comparing two non-negative or two
542 // negative values. But in practice, we will never pass DoesZExtCollapse
543 // check for a negative value, because zext(trunc(x)) is non-negative. So
544 // it only make sense to check for non-negativity here.
545 const SCEV *SCEVOP1 = SE->getSCEV(ICI->getOperand(0));
546 const SCEV *SCEVOP2 = SE->getSCEV(ICI->getOperand(1));
547 return SE->isKnownNonNegative(SCEVOP1) && SE->isKnownNonNegative(SCEVOP2);
548 };
549 // Replace all comparisons against trunc with comparisons against IV.
550 for (auto *ICI : ICmpUsers) {
551 bool IsSwapped = L->isLoopInvariant(ICI->getOperand(0));
552 auto *Op1 = IsSwapped ? ICI->getOperand(0) : ICI->getOperand(1);
553 IRBuilder<> Builder(ICI);
554 Value *Ext = nullptr;
555 // For signed/unsigned predicate, replace the old comparison with comparison
556 // of immediate IV against sext/zext of the invariant argument. If we can
557 // use either sext or zext (i.e. we are dealing with equality predicate),
558 // then prefer zext as a more canonical form.
559 // TODO: If we see a signed comparison which can be turned into unsigned,
560 // we can do it here for canonicalization purposes.
561 ICmpInst::Predicate Pred = ICI->getPredicate();
562 if (IsSwapped) Pred = ICmpInst::getSwappedPredicate(Pred);
563 if (CanUseZExt(ICI)) {
564 assert(DoesZExtCollapse && "Unprofitable zext?");
565 Ext = Builder.CreateZExt(Op1, IVTy, "zext");
567 } else {
568 assert(DoesSExtCollapse && "Unprofitable sext?");
569 Ext = Builder.CreateSExt(Op1, IVTy, "sext");
570 assert(Pred == ICmpInst::getSignedPredicate(Pred) && "Must be signed!");
571 }
572 bool Changed;
573 L->makeLoopInvariant(Ext, Changed);
574 (void)Changed;
575 auto *NewCmp = Builder.CreateICmp(Pred, IV, Ext);
576 ICI->replaceAllUsesWith(NewCmp);
577 DeadInsts.emplace_back(ICI);
578 }
579
580 // Trunc no longer needed.
582 DeadInsts.emplace_back(TI);
583 return true;
584}
585
586/// Eliminate an operation that consumes a simple IV and has no observable
587/// side-effect given the range of IV values. IVOperand is guaranteed SCEVable,
588/// but UseInst may not be.
589bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
590 Instruction *IVOperand) {
591 if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
592 eliminateIVComparison(ICmp, IVOperand);
593 return true;
594 }
595 if (BinaryOperator *Bin = dyn_cast<BinaryOperator>(UseInst)) {
596 bool IsSRem = Bin->getOpcode() == Instruction::SRem;
597 if (IsSRem || Bin->getOpcode() == Instruction::URem) {
598 simplifyIVRemainder(Bin, IVOperand, IsSRem);
599 return true;
600 }
601
602 if (Bin->getOpcode() == Instruction::SDiv)
603 return eliminateSDiv(Bin);
604 }
605
606 if (auto *WO = dyn_cast<WithOverflowInst>(UseInst))
607 if (eliminateOverflowIntrinsic(WO))
608 return true;
609
610 if (auto *SI = dyn_cast<SaturatingInst>(UseInst))
611 if (eliminateSaturatingIntrinsic(SI))
612 return true;
613
614 if (auto *TI = dyn_cast<TruncInst>(UseInst))
615 if (eliminateTrunc(TI))
616 return true;
617
618 if (eliminateIdentitySCEV(UseInst, IVOperand))
619 return true;
620
621 return false;
622}
623
625 if (auto *BB = L->getLoopPreheader())
626 return BB->getTerminator();
627
628 return Hint;
629}
630
631/// Replace the UseInst with a loop invariant expression if it is safe.
632bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) {
633 if (!SE->isSCEVable(I->getType()))
634 return false;
635
636 // Get the symbolic expression for this instruction.
637 const SCEV *S = SE->getSCEV(I);
638
639 if (!SE->isLoopInvariant(S, L))
640 return false;
641
642 // Do not generate something ridiculous even if S is loop invariant.
643 if (Rewriter.isHighCostExpansion(S, L, SCEVCheapExpansionBudget, TTI, I))
644 return false;
645
646 auto *IP = GetLoopInvariantInsertPosition(L, I);
647
648 if (!Rewriter.isSafeToExpandAt(S, IP)) {
649 LLVM_DEBUG(dbgs() << "INDVARS: Can not replace IV user: " << *I
650 << " with non-speculable loop invariant: " << *S << '\n');
651 return false;
652 }
653
654 auto *Invariant = Rewriter.expandCodeFor(S, I->getType(), IP);
655 bool NeedToEmitLCSSAPhis = false;
656 if (!LI->replacementPreservesLCSSAForm(I, Invariant))
657 NeedToEmitLCSSAPhis = true;
658
659 I->replaceAllUsesWith(Invariant);
660 LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I
661 << " with loop invariant: " << *S << '\n');
662
663 if (NeedToEmitLCSSAPhis) {
664 SmallVector<Instruction *, 1> NeedsLCSSAPhis;
665 NeedsLCSSAPhis.push_back(cast<Instruction>(Invariant));
666 formLCSSAForInstructions(NeedsLCSSAPhis, *DT, *LI, SE);
667 LLVM_DEBUG(dbgs() << " INDVARS: Replacement breaks LCSSA form"
668 << " inserting LCSSA Phis" << '\n');
669 }
670 ++NumFoldedUser;
671 Changed = true;
672 DeadInsts.emplace_back(I);
673 return true;
674}
675
676/// Eliminate redundant type cast between integer and float.
677bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) {
678 if (UseInst->getOpcode() != CastInst::SIToFP &&
679 UseInst->getOpcode() != CastInst::UIToFP)
680 return false;
681
682 Instruction *IVOperand = cast<Instruction>(UseInst->getOperand(0));
683 // Get the symbolic expression for this instruction.
684 const SCEV *IV = SE->getSCEV(IVOperand);
685 int MaskBits;
686 if (UseInst->getOpcode() == CastInst::SIToFP)
687 MaskBits = (int)SE->getSignedRange(IV).getMinSignedBits();
688 else
689 MaskBits = (int)SE->getUnsignedRange(IV).getActiveBits();
690 int DestNumSigBits = UseInst->getType()->getFPMantissaWidth();
691 if (MaskBits <= DestNumSigBits) {
692 for (User *U : UseInst->users()) {
693 // Match for fptosi/fptoui of sitofp and with same type.
694 auto *CI = dyn_cast<CastInst>(U);
695 if (!CI)
696 continue;
697
698 CastInst::CastOps Opcode = CI->getOpcode();
699 if (Opcode != CastInst::FPToSI && Opcode != CastInst::FPToUI)
700 continue;
701
702 Value *Conv = nullptr;
703 if (IVOperand->getType() != CI->getType()) {
704 IRBuilder<> Builder(CI);
705 StringRef Name = IVOperand->getName();
706 // To match InstCombine logic, we only need sext if both fptosi and
707 // sitofp are used. If one of them is unsigned, then we can use zext.
708 if (SE->getTypeSizeInBits(IVOperand->getType()) >
709 SE->getTypeSizeInBits(CI->getType())) {
710 Conv = Builder.CreateTrunc(IVOperand, CI->getType(), Name + ".trunc");
711 } else if (Opcode == CastInst::FPToUI ||
712 UseInst->getOpcode() == CastInst::UIToFP) {
713 Conv = Builder.CreateZExt(IVOperand, CI->getType(), Name + ".zext");
714 } else {
715 Conv = Builder.CreateSExt(IVOperand, CI->getType(), Name + ".sext");
716 }
717 } else
718 Conv = IVOperand;
719
720 CI->replaceAllUsesWith(Conv);
721 DeadInsts.push_back(CI);
722 LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *CI
723 << " with: " << *Conv << '\n');
724
725 ++NumFoldedUser;
726 Changed = true;
727 }
728 }
729
730 return Changed;
731}
732
733/// Eliminate any operation that SCEV can prove is an identity function.
734bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
735 Instruction *IVOperand) {
736 if (!SE->isSCEVable(UseInst->getType()) ||
737 UseInst->getType() != IVOperand->getType())
738 return false;
739
740 const SCEV *UseSCEV = SE->getSCEV(UseInst);
741 if (UseSCEV != SE->getSCEV(IVOperand))
742 return false;
743
744 // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the
745 // dominator tree, even if X is an operand to Y. For instance, in
746 //
747 // %iv = phi i32 {0,+,1}
748 // br %cond, label %left, label %merge
749 //
750 // left:
751 // %X = add i32 %iv, 0
752 // br label %merge
753 //
754 // merge:
755 // %M = phi (%X, %iv)
756 //
757 // getSCEV(%M) == getSCEV(%X) == {0,+,1}, but %X does not dominate %M, and
758 // %M.replaceAllUsesWith(%X) would be incorrect.
759
760 if (isa<PHINode>(UseInst))
761 // If UseInst is not a PHI node then we know that IVOperand dominates
762 // UseInst directly from the legality of SSA.
763 if (!DT || !DT->dominates(IVOperand, UseInst))
764 return false;
765
766 if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand))
767 return false;
768
769 // Make sure the operand is not more poisonous than the instruction.
770 if (!impliesPoison(IVOperand, UseInst)) {
771 SmallVector<Instruction *> DropPoisonGeneratingInsts;
772 if (!SE->canReuseInstruction(UseSCEV, IVOperand, DropPoisonGeneratingInsts))
773 return false;
774
775 for (Instruction *I : DropPoisonGeneratingInsts)
776 I->dropPoisonGeneratingAnnotations();
777 }
778
779 LLVM_DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
780
781 SE->forgetValue(UseInst);
782 UseInst->replaceAllUsesWith(IVOperand);
783 ++NumElimIdentity;
784 Changed = true;
785 DeadInsts.emplace_back(UseInst);
786 return true;
787}
788
789bool SimplifyIndvar::strengthenBinaryOp(BinaryOperator *BO,
790 Instruction *IVOperand) {
791 return (isa<OverflowingBinaryOperator>(BO) &&
792 strengthenOverflowingOperation(BO, IVOperand)) ||
793 (isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand));
794}
795
796/// Annotate BO with nsw / nuw if it provably does not signed-overflow /
797/// unsigned-overflow. Returns true if anything changed, false otherwise.
798bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
799 Instruction *IVOperand) {
800 auto Flags = SE->getStrengthenedNoWrapFlagsFromBinOp(
801 cast<OverflowingBinaryOperator>(BO));
802
803 if (!Flags)
804 return false;
805
810
811 // The getStrengthenedNoWrapFlagsFromBinOp() check inferred additional nowrap
812 // flags on addrecs while performing zero/sign extensions. We could call
813 // forgetValue() here to make sure those flags also propagate to any other
814 // SCEV expressions based on the addrec. However, this can have pathological
815 // compile-time impact, see https://bugs.llvm.org/show_bug.cgi?id=50384.
816 return true;
817}
818
819/// Annotate the Shr in (X << IVOperand) >> C as exact using the
820/// information from the IV's range. Returns true if anything changed, false
821/// otherwise.
822bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO,
823 Instruction *IVOperand) {
824 if (BO->getOpcode() == Instruction::Shl) {
825 bool Changed = false;
826 ConstantRange IVRange = SE->getUnsignedRange(SE->getSCEV(IVOperand));
827 for (auto *U : BO->users()) {
828 const APInt *C;
829 if (match(U,
830 m_AShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C))) ||
831 match(U,
832 m_LShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C)))) {
833 BinaryOperator *Shr = cast<BinaryOperator>(U);
834 if (!Shr->isExact() && IVRange.getUnsignedMin().uge(*C)) {
835 Shr->setIsExact(true);
836 Changed = true;
837 }
838 }
839 }
840 return Changed;
841 }
842
843 return false;
844}
845
846/// Add all uses of Def to the current IV's worklist.
847void SimplifyIndvar::pushIVUsers(
849 SmallVectorImpl<std::pair<Instruction *, Instruction *>> &SimpleIVUsers) {
850 for (User *U : Def->users()) {
851 Instruction *UI = cast<Instruction>(U);
852
853 // Avoid infinite or exponential worklist processing.
854 // Also ensure unique worklist users.
855 // If Def is a LoopPhi, it may not be in the Simplified set, so check for
856 // self edges first.
857 if (UI == Def)
858 continue;
859
860 // Only change the current Loop, do not change the other parts (e.g. other
861 // Loops).
862 if (!L->contains(UI))
863 continue;
864
865 // Do not push the same instruction more than once.
866 if (!Simplified.insert(UI).second)
867 continue;
868
869 SimpleIVUsers.push_back(std::make_pair(UI, Def));
870 }
871}
872
873/// Return true if this instruction generates a simple SCEV
874/// expression in terms of that IV.
875///
876/// This is similar to IVUsers' isInteresting() but processes each instruction
877/// non-recursively when the operand is already known to be a simpleIVUser.
878///
879static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
880 if (!SE->isSCEVable(I->getType()))
881 return false;
882
883 // Get the symbolic expression for this instruction.
884 const SCEV *S = SE->getSCEV(I);
885
886 // Only consider affine recurrences.
887 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
888 if (AR && AR->getLoop() == L)
889 return true;
890
891 return false;
892}
893
894/// Iteratively perform simplification on a worklist of users
895/// of the specified induction variable. Each successive simplification may push
896/// more users which may themselves be candidates for simplification.
897///
898/// This algorithm does not require IVUsers analysis. Instead, it simplifies
899/// instructions in-place during analysis. Rather than rewriting induction
900/// variables bottom-up from their users, it transforms a chain of IVUsers
901/// top-down, updating the IR only when it encounters a clear optimization
902/// opportunity.
903///
904/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
905///
906void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
907 if (!SE->isSCEVable(CurrIV->getType()))
908 return;
909
910 // Instructions processed by SimplifyIndvar for CurrIV.
912
913 // Use-def pairs if IV users waiting to be processed for CurrIV.
915
916 // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
917 // called multiple times for the same LoopPhi. This is the proper thing to
918 // do for loop header phis that use each other.
919 pushIVUsers(CurrIV, Simplified, SimpleIVUsers);
920
921 while (!SimpleIVUsers.empty()) {
922 std::pair<Instruction*, Instruction*> UseOper =
923 SimpleIVUsers.pop_back_val();
924 Instruction *UseInst = UseOper.first;
925
926 // If a user of the IndVar is trivially dead, we prefer just to mark it dead
927 // rather than try to do some complex analysis or transformation (such as
928 // widening) basing on it.
929 // TODO: Propagate TLI and pass it here to handle more cases.
930 if (isInstructionTriviallyDead(UseInst, /* TLI */ nullptr)) {
931 DeadInsts.emplace_back(UseInst);
932 continue;
933 }
934
935 // Bypass back edges to avoid extra work.
936 if (UseInst == CurrIV) continue;
937
938 // Try to replace UseInst with a loop invariant before any other
939 // simplifications.
940 if (replaceIVUserWithLoopInvariant(UseInst))
941 continue;
942
943 // Go further for the bitcast 'prtoint ptr to i64' or if the cast is done
944 // by truncation
945 if ((isa<PtrToIntInst>(UseInst)) || (isa<TruncInst>(UseInst)))
946 for (Use &U : UseInst->uses()) {
947 Instruction *User = cast<Instruction>(U.getUser());
948 if (replaceIVUserWithLoopInvariant(User))
949 break; // done replacing
950 }
951
952 Instruction *IVOperand = UseOper.second;
953 for (unsigned N = 0; IVOperand; ++N) {
954 assert(N <= Simplified.size() && "runaway iteration");
955 (void) N;
956
957 Value *NewOper = foldIVUser(UseInst, IVOperand);
958 if (!NewOper)
959 break; // done folding
960 IVOperand = dyn_cast<Instruction>(NewOper);
961 }
962 if (!IVOperand)
963 continue;
964
965 if (eliminateIVUser(UseInst, IVOperand)) {
966 pushIVUsers(IVOperand, Simplified, SimpleIVUsers);
967 continue;
968 }
969
970 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseInst)) {
971 if (strengthenBinaryOp(BO, IVOperand)) {
972 // re-queue uses of the now modified binary operator and fall
973 // through to the checks that remain.
974 pushIVUsers(IVOperand, Simplified, SimpleIVUsers);
975 }
976 }
977
978 // Try to use integer induction for FPToSI of float induction directly.
979 if (replaceFloatIVWithIntegerIV(UseInst)) {
980 // Re-queue the potentially new direct uses of IVOperand.
981 pushIVUsers(IVOperand, Simplified, SimpleIVUsers);
982 continue;
983 }
984
985 CastInst *Cast = dyn_cast<CastInst>(UseInst);
986 if (V && Cast) {
987 V->visitCast(Cast);
988 continue;
989 }
990 if (isSimpleIVUser(UseInst, L, SE)) {
991 pushIVUsers(UseInst, Simplified, SimpleIVUsers);
992 }
993 }
994}
995
996namespace llvm {
997
999
1000/// Simplify instructions that use this induction variable
1001/// by using ScalarEvolution to analyze the IV's recurrence.
1002/// Returns a pair where the first entry indicates that the function makes
1003/// changes and the second entry indicates that it introduced new opportunities
1004/// for loop unswitching.
1005std::pair<bool, bool> simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE,
1006 DominatorTree *DT, LoopInfo *LI,
1007 const TargetTransformInfo *TTI,
1009 SCEVExpander &Rewriter, IVVisitor *V) {
1010 SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, TTI,
1011 Rewriter, Dead);
1012 SIV.simplifyUsers(CurrIV, V);
1013 return {SIV.hasChanged(), SIV.runUnswitching()};
1014}
1015
1016/// Simplify users of induction variables within this
1017/// loop. This does not actually change or add IVs.
1019 LoopInfo *LI, const TargetTransformInfo *TTI,
1021 SCEVExpander Rewriter(*SE, SE->getDataLayout(), "indvars");
1022#ifndef NDEBUG
1023 Rewriter.setDebugType(DEBUG_TYPE);
1024#endif
1025 bool Changed = false;
1026 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1027 const auto &[C, _] =
1028 simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, TTI, Dead, Rewriter);
1029 Changed |= C;
1030 }
1031 return Changed;
1032}
1033
1034} // namespace llvm
1035
1036namespace {
1037//===----------------------------------------------------------------------===//
1038// Widen Induction Variables - Extend the width of an IV to cover its
1039// widest uses.
1040//===----------------------------------------------------------------------===//
1041
1042class WidenIV {
1043 // Parameters
1044 PHINode *OrigPhi;
1045 Type *WideType;
1046
1047 // Context
1048 LoopInfo *LI;
1049 Loop *L;
1050 ScalarEvolution *SE;
1051 DominatorTree *DT;
1052
1053 // Does the module have any calls to the llvm.experimental.guard intrinsic
1054 // at all? If not we can avoid scanning instructions looking for guards.
1055 bool HasGuards;
1056
1057 bool UsePostIncrementRanges;
1058
1059 // Statistics
1060 unsigned NumElimExt = 0;
1061 unsigned NumWidened = 0;
1062
1063 // Result
1064 PHINode *WidePhi = nullptr;
1065 Instruction *WideInc = nullptr;
1066 const SCEV *WideIncExpr = nullptr;
1068
1070
1071 enum class ExtendKind { Zero, Sign, Unknown };
1072
1073 // A map tracking the kind of extension used to widen each narrow IV
1074 // and narrow IV user.
1075 // Key: pointer to a narrow IV or IV user.
1076 // Value: the kind of extension used to widen this Instruction.
1077 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
1078
1079 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
1080
1081 // A map with control-dependent ranges for post increment IV uses. The key is
1082 // a pair of IV def and a use of this def denoting the context. The value is
1083 // a ConstantRange representing possible values of the def at the given
1084 // context.
1085 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
1086
1087 std::optional<ConstantRange> getPostIncRangeInfo(Value *Def,
1088 Instruction *UseI) {
1089 DefUserPair Key(Def, UseI);
1090 auto It = PostIncRangeInfos.find(Key);
1091 return It == PostIncRangeInfos.end()
1092 ? std::optional<ConstantRange>(std::nullopt)
1093 : std::optional<ConstantRange>(It->second);
1094 }
1095
1096 void calculatePostIncRanges(PHINode *OrigPhi);
1097 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
1098
1099 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
1100 DefUserPair Key(Def, UseI);
1101 auto It = PostIncRangeInfos.find(Key);
1102 if (It == PostIncRangeInfos.end())
1103 PostIncRangeInfos.insert({Key, R});
1104 else
1105 It->second = R.intersectWith(It->second);
1106 }
1107
1108public:
1109 /// Record a link in the Narrow IV def-use chain along with the WideIV that
1110 /// computes the same value as the Narrow IV def. This avoids caching Use*
1111 /// pointers.
1112 struct NarrowIVDefUse {
1113 Instruction *NarrowDef = nullptr;
1114 Instruction *NarrowUse = nullptr;
1115 Instruction *WideDef = nullptr;
1116
1117 // True if the narrow def is never negative. Tracking this information lets
1118 // us use a sign extension instead of a zero extension or vice versa, when
1119 // profitable and legal.
1120 bool NeverNegative = false;
1121
1122 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
1123 bool NeverNegative)
1124 : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
1125 NeverNegative(NeverNegative) {}
1126 };
1127
1128 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1130 bool HasGuards, bool UsePostIncrementRanges = true);
1131
1132 PHINode *createWideIV(SCEVExpander &Rewriter);
1133
1134 unsigned getNumElimExt() { return NumElimExt; };
1135 unsigned getNumWidened() { return NumWidened; };
1136
1137protected:
1138 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1139 Instruction *Use);
1140
1141 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1142 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1143 const SCEVAddRecExpr *WideAR);
1144 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1145
1146 ExtendKind getExtendKind(Instruction *I);
1147
1148 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1149
1150 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1151
1152 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1153
1154 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1155 unsigned OpCode) const;
1156
1157 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter,
1158 PHINode *OrigPhi, PHINode *WidePhi);
1159 void truncateIVUse(NarrowIVDefUse DU);
1160
1161 bool widenLoopCompare(NarrowIVDefUse DU);
1162 bool widenWithVariantUse(NarrowIVDefUse DU);
1163
1164 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1165
1166private:
1167 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
1168};
1169} // namespace
1170
1171/// Determine the insertion point for this user. By default, insert immediately
1172/// before the user. SCEVExpander or LICM will hoist loop invariants out of the
1173/// loop. For PHI nodes, there may be multiple uses, so compute the nearest
1174/// common dominator for the incoming blocks. A nullptr can be returned if no
1175/// viable location is found: it may happen if User is a PHI and Def only comes
1176/// to this PHI from unreachable blocks.
1178 DominatorTree *DT, LoopInfo *LI) {
1179 PHINode *PHI = dyn_cast<PHINode>(User);
1180 if (!PHI)
1181 return User;
1182
1183 Instruction *InsertPt = nullptr;
1184 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
1185 if (PHI->getIncomingValue(i) != Def)
1186 continue;
1187
1188 BasicBlock *InsertBB = PHI->getIncomingBlock(i);
1189
1190 if (!DT->isReachableFromEntry(InsertBB))
1191 continue;
1192
1193 if (!InsertPt) {
1194 InsertPt = InsertBB->getTerminator();
1195 continue;
1196 }
1197 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
1198 InsertPt = InsertBB->getTerminator();
1199 }
1200
1201 // If we have skipped all inputs, it means that Def only comes to Phi from
1202 // unreachable blocks.
1203 if (!InsertPt)
1204 return nullptr;
1205
1206 auto *DefI = dyn_cast<Instruction>(Def);
1207 if (!DefI)
1208 return InsertPt;
1209
1210 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
1211
1212 auto *L = LI->getLoopFor(DefI->getParent());
1213 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
1214
1215 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
1216 if (LI->getLoopFor(DTN->getBlock()) == L)
1217 return DTN->getBlock()->getTerminator();
1218
1219 llvm_unreachable("DefI dominates InsertPt!");
1220}
1221
1222WidenIV::WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1224 bool HasGuards, bool UsePostIncrementRanges)
1225 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1226 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1227 HasGuards(HasGuards), UsePostIncrementRanges(UsePostIncrementRanges),
1228 DeadInsts(DI) {
1229 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1230 ExtendKindMap[OrigPhi] = WI.IsSigned ? ExtendKind::Sign : ExtendKind::Zero;
1231}
1232
1233Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1234 bool IsSigned, Instruction *Use) {
1235 // Set the debug location and conservative insertion point.
1236 IRBuilder<> Builder(Use);
1237 // Hoist the insertion point into loop preheaders as far as possible.
1238 for (const Loop *L = LI->getLoopFor(Use->getParent());
1239 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1240 L = L->getParentLoop())
1241 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1242
1243 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1244 Builder.CreateZExt(NarrowOper, WideType);
1245}
1246
1247/// Instantiate a wide operation to replace a narrow operation. This only needs
1248/// to handle operations that can evaluation to SCEVAddRec. It can safely return
1249/// 0 for any operation we decide not to clone.
1250Instruction *WidenIV::cloneIVUser(WidenIV::NarrowIVDefUse DU,
1251 const SCEVAddRecExpr *WideAR) {
1252 unsigned Opcode = DU.NarrowUse->getOpcode();
1253 switch (Opcode) {
1254 default:
1255 return nullptr;
1256 case Instruction::Add:
1257 case Instruction::Mul:
1258 case Instruction::UDiv:
1259 case Instruction::Sub:
1260 return cloneArithmeticIVUser(DU, WideAR);
1261
1262 case Instruction::And:
1263 case Instruction::Or:
1264 case Instruction::Xor:
1265 case Instruction::Shl:
1266 case Instruction::LShr:
1267 case Instruction::AShr:
1268 return cloneBitwiseIVUser(DU);
1269 }
1270}
1271
1272Instruction *WidenIV::cloneBitwiseIVUser(WidenIV::NarrowIVDefUse DU) {
1273 Instruction *NarrowUse = DU.NarrowUse;
1274 Instruction *NarrowDef = DU.NarrowDef;
1275 Instruction *WideDef = DU.WideDef;
1276
1277 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1278
1279 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1280 // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1281 // invariant and will be folded or hoisted. If it actually comes from a
1282 // widened IV, it should be removed during a future call to widenIVUse.
1283 bool IsSigned = getExtendKind(NarrowDef) == ExtendKind::Sign;
1284 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1285 ? WideDef
1286 : createExtendInst(NarrowUse->getOperand(0), WideType,
1287 IsSigned, NarrowUse);
1288 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1289 ? WideDef
1290 : createExtendInst(NarrowUse->getOperand(1), WideType,
1291 IsSigned, NarrowUse);
1292
1293 auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1294 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1295 NarrowBO->getName());
1296 IRBuilder<> Builder(NarrowUse);
1297 Builder.Insert(WideBO);
1298 WideBO->copyIRFlags(NarrowBO);
1299 return WideBO;
1300}
1301
1302Instruction *WidenIV::cloneArithmeticIVUser(WidenIV::NarrowIVDefUse DU,
1303 const SCEVAddRecExpr *WideAR) {
1304 Instruction *NarrowUse = DU.NarrowUse;
1305 Instruction *NarrowDef = DU.NarrowDef;
1306 Instruction *WideDef = DU.WideDef;
1307
1308 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1309
1310 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1311
1312 // We're trying to find X such that
1313 //
1314 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1315 //
1316 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1317 // and check using SCEV if any of them are correct.
1318
1319 // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1320 // correct solution to X.
1321 auto GuessNonIVOperand = [&](bool SignExt) {
1322 const SCEV *WideLHS;
1323 const SCEV *WideRHS;
1324
1325 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1326 if (SignExt)
1327 return SE->getSignExtendExpr(S, Ty);
1328 return SE->getZeroExtendExpr(S, Ty);
1329 };
1330
1331 if (IVOpIdx == 0) {
1332 WideLHS = SE->getSCEV(WideDef);
1333 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1334 WideRHS = GetExtend(NarrowRHS, WideType);
1335 } else {
1336 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1337 WideLHS = GetExtend(NarrowLHS, WideType);
1338 WideRHS = SE->getSCEV(WideDef);
1339 }
1340
1341 // WideUse is "WideDef `op.wide` X" as described in the comment.
1342 const SCEV *WideUse =
1343 getSCEVByOpCode(WideLHS, WideRHS, NarrowUse->getOpcode());
1344
1345 return WideUse == WideAR;
1346 };
1347
1348 bool SignExtend = getExtendKind(NarrowDef) == ExtendKind::Sign;
1349 if (!GuessNonIVOperand(SignExtend)) {
1350 SignExtend = !SignExtend;
1351 if (!GuessNonIVOperand(SignExtend))
1352 return nullptr;
1353 }
1354
1355 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1356 ? WideDef
1357 : createExtendInst(NarrowUse->getOperand(0), WideType,
1358 SignExtend, NarrowUse);
1359 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1360 ? WideDef
1361 : createExtendInst(NarrowUse->getOperand(1), WideType,
1362 SignExtend, NarrowUse);
1363
1364 auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1365 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1366 NarrowBO->getName());
1367
1368 IRBuilder<> Builder(NarrowUse);
1369 Builder.Insert(WideBO);
1370 WideBO->copyIRFlags(NarrowBO);
1371 return WideBO;
1372}
1373
1374WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1375 auto It = ExtendKindMap.find(I);
1376 assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1377 return It->second;
1378}
1379
1380const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1381 unsigned OpCode) const {
1382 switch (OpCode) {
1383 case Instruction::Add:
1384 return SE->getAddExpr(LHS, RHS);
1385 case Instruction::Sub:
1386 return SE->getMinusSCEV(LHS, RHS);
1387 case Instruction::Mul:
1388 return SE->getMulExpr(LHS, RHS);
1389 case Instruction::UDiv:
1390 return SE->getUDivExpr(LHS, RHS);
1391 default:
1392 llvm_unreachable("Unsupported opcode.");
1393 };
1394}
1395
1396namespace {
1397
1398// Represents a interesting integer binary operation for
1399// getExtendedOperandRecurrence. This may be a shl that is being treated as a
1400// multiply or a 'or disjoint' that is being treated as 'add nsw nuw'.
1401struct BinaryOp {
1402 unsigned Opcode;
1403 std::array<Value *, 2> Operands;
1404 bool IsNSW = false;
1405 bool IsNUW = false;
1406
1407 explicit BinaryOp(Instruction *Op)
1408 : Opcode(Op->getOpcode()),
1409 Operands({Op->getOperand(0), Op->getOperand(1)}) {
1410 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Op)) {
1411 IsNSW = OBO->hasNoSignedWrap();
1412 IsNUW = OBO->hasNoUnsignedWrap();
1413 }
1414 }
1415
1416 explicit BinaryOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS,
1417 bool IsNSW = false, bool IsNUW = false)
1418 : Opcode(Opcode), Operands({LHS, RHS}), IsNSW(IsNSW), IsNUW(IsNUW) {}
1419};
1420
1421} // end anonymous namespace
1422
1423static std::optional<BinaryOp> matchBinaryOp(Instruction *Op) {
1424 switch (Op->getOpcode()) {
1425 case Instruction::Add:
1426 case Instruction::Sub:
1427 case Instruction::Mul:
1428 return BinaryOp(Op);
1429 case Instruction::Or: {
1430 // Convert or disjoint into add nuw nsw.
1431 if (cast<PossiblyDisjointInst>(Op)->isDisjoint())
1432 return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1),
1433 /*IsNSW=*/true, /*IsNUW=*/true);
1434 break;
1435 }
1436 case Instruction::Shl: {
1437 if (ConstantInt *SA = dyn_cast<ConstantInt>(Op->getOperand(1))) {
1438 unsigned BitWidth = cast<IntegerType>(SA->getType())->getBitWidth();
1439
1440 // If the shift count is not less than the bitwidth, the result of
1441 // the shift is undefined. Don't try to analyze it, because the
1442 // resolution chosen here may differ from the resolution chosen in
1443 // other parts of the compiler.
1444 if (SA->getValue().ult(BitWidth)) {
1445 // We can safely preserve the nuw flag in all cases. It's also safe to
1446 // turn a nuw nsw shl into a nuw nsw mul. However, nsw in isolation
1447 // requires special handling. It can be preserved as long as we're not
1448 // left shifting by bitwidth - 1.
1449 bool IsNUW = Op->hasNoUnsignedWrap();
1450 bool IsNSW = Op->hasNoSignedWrap() &&
1451 (IsNUW || SA->getValue().ult(BitWidth - 1));
1452
1453 ConstantInt *X =
1454 ConstantInt::get(Op->getContext(),
1455 APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
1456 return BinaryOp(Instruction::Mul, Op->getOperand(0), X, IsNSW, IsNUW);
1457 }
1458 }
1459
1460 break;
1461 }
1462 }
1463
1464 return std::nullopt;
1465}
1466
1467/// No-wrap operations can transfer sign extension of their result to their
1468/// operands. Generate the SCEV value for the widened operation without
1469/// actually modifying the IR yet. If the expression after extending the
1470/// operands is an AddRec for this loop, return the AddRec and the kind of
1471/// extension used.
1472WidenIV::WidenedRecTy
1473WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) {
1474 auto Op = matchBinaryOp(DU.NarrowUse);
1475 if (!Op)
1476 return {nullptr, ExtendKind::Unknown};
1477
1478 assert((Op->Opcode == Instruction::Add || Op->Opcode == Instruction::Sub ||
1479 Op->Opcode == Instruction::Mul) &&
1480 "Unexpected opcode");
1481
1482 // One operand (NarrowDef) has already been extended to WideDef. Now determine
1483 // if extending the other will lead to a recurrence.
1484 const unsigned ExtendOperIdx = Op->Operands[0] == DU.NarrowDef ? 1 : 0;
1485 assert(Op->Operands[1 - ExtendOperIdx] == DU.NarrowDef && "bad DU");
1486
1487 ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1488 if (!(ExtKind == ExtendKind::Sign && Op->IsNSW) &&
1489 !(ExtKind == ExtendKind::Zero && Op->IsNUW)) {
1490 ExtKind = ExtendKind::Unknown;
1491
1492 // For a non-negative NarrowDef, we can choose either type of
1493 // extension. We want to use the current extend kind if legal
1494 // (see above), and we only hit this code if we need to check
1495 // the opposite case.
1496 if (DU.NeverNegative) {
1497 if (Op->IsNSW) {
1498 ExtKind = ExtendKind::Sign;
1499 } else if (Op->IsNUW) {
1500 ExtKind = ExtendKind::Zero;
1501 }
1502 }
1503 }
1504
1505 const SCEV *ExtendOperExpr = SE->getSCEV(Op->Operands[ExtendOperIdx]);
1506 if (ExtKind == ExtendKind::Sign)
1507 ExtendOperExpr = SE->getSignExtendExpr(ExtendOperExpr, WideType);
1508 else if (ExtKind == ExtendKind::Zero)
1509 ExtendOperExpr = SE->getZeroExtendExpr(ExtendOperExpr, WideType);
1510 else
1511 return {nullptr, ExtendKind::Unknown};
1512
1513 // When creating this SCEV expr, don't apply the current operations NSW or NUW
1514 // flags. This instruction may be guarded by control flow that the no-wrap
1515 // behavior depends on. Non-control-equivalent instructions can be mapped to
1516 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1517 // semantics to those operations.
1518 const SCEV *lhs = SE->getSCEV(DU.WideDef);
1519 const SCEV *rhs = ExtendOperExpr;
1520
1521 // Let's swap operands to the initial order for the case of non-commutative
1522 // operations, like SUB. See PR21014.
1523 if (ExtendOperIdx == 0)
1524 std::swap(lhs, rhs);
1525 const SCEVAddRecExpr *AddRec =
1526 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, Op->Opcode));
1527
1528 if (!AddRec || AddRec->getLoop() != L)
1529 return {nullptr, ExtendKind::Unknown};
1530
1531 return {AddRec, ExtKind};
1532}
1533
1534/// Is this instruction potentially interesting for further simplification after
1535/// widening it's type? In other words, can the extend be safely hoisted out of
1536/// the loop with SCEV reducing the value to a recurrence on the same loop. If
1537/// so, return the extended recurrence and the kind of extension used. Otherwise
1538/// return {nullptr, ExtendKind::Unknown}.
1539WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) {
1540 if (!DU.NarrowUse->getType()->isIntegerTy())
1541 return {nullptr, ExtendKind::Unknown};
1542
1543 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1544 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1545 SE->getTypeSizeInBits(WideType)) {
1546 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1547 // index. So don't follow this use.
1548 return {nullptr, ExtendKind::Unknown};
1549 }
1550
1551 const SCEV *WideExpr;
1552 ExtendKind ExtKind;
1553 if (DU.NeverNegative) {
1554 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1555 if (isa<SCEVAddRecExpr>(WideExpr))
1556 ExtKind = ExtendKind::Sign;
1557 else {
1558 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1559 ExtKind = ExtendKind::Zero;
1560 }
1561 } else if (getExtendKind(DU.NarrowDef) == ExtendKind::Sign) {
1562 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1563 ExtKind = ExtendKind::Sign;
1564 } else {
1565 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1566 ExtKind = ExtendKind::Zero;
1567 }
1568 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1569 if (!AddRec || AddRec->getLoop() != L)
1570 return {nullptr, ExtendKind::Unknown};
1571 return {AddRec, ExtKind};
1572}
1573
1574/// This IV user cannot be widened. Replace this use of the original narrow IV
1575/// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1576void WidenIV::truncateIVUse(NarrowIVDefUse DU) {
1577 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1578 if (!InsertPt)
1579 return;
1580 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1581 << *DU.NarrowUse << "\n");
1582 ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1583 IRBuilder<> Builder(InsertPt);
1584 Value *Trunc =
1585 Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType(), "",
1586 DU.NeverNegative || ExtKind == ExtendKind::Zero,
1587 DU.NeverNegative || ExtKind == ExtendKind::Sign);
1588 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1589}
1590
1591/// If the narrow use is a compare instruction, then widen the compare
1592// (and possibly the other operand). The extend operation is hoisted into the
1593// loop preheader as far as possible.
1594bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) {
1595 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1596 if (!Cmp)
1597 return false;
1598
1599 // We can legally widen the comparison in the following two cases:
1600 //
1601 // - The signedness of the IV extension and comparison match
1602 //
1603 // - The narrow IV is always positive (and thus its sign extension is equal
1604 // to its zero extension). For instance, let's say we're zero extending
1605 // %narrow for the following use
1606 //
1607 // icmp slt i32 %narrow, %val ... (A)
1608 //
1609 // and %narrow is always positive. Then
1610 //
1611 // (A) == icmp slt i32 sext(%narrow), sext(%val)
1612 // == icmp slt i32 zext(%narrow), sext(%val)
1613 bool IsSigned = getExtendKind(DU.NarrowDef) == ExtendKind::Sign;
1614 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1615 return false;
1616
1617 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1618 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1619 unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1620 assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1621
1622 // Widen the compare instruction.
1623 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1624
1625 // Widen the other operand of the compare, if necessary.
1626 if (CastWidth < IVWidth) {
1627 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1628 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1629 }
1630 return true;
1631}
1632
1633// The widenIVUse avoids generating trunc by evaluating the use as AddRec, this
1634// will not work when:
1635// 1) SCEV traces back to an instruction inside the loop that SCEV can not
1636// expand, eg. add %indvar, (load %addr)
1637// 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant
1638// While SCEV fails to avoid trunc, we can still try to use instruction
1639// combining approach to prove trunc is not required. This can be further
1640// extended with other instruction combining checks, but for now we handle the
1641// following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext")
1642//
1643// Src:
1644// %c = sub nsw %b, %indvar
1645// %d = sext %c to i64
1646// Dst:
1647// %indvar.ext1 = sext %indvar to i64
1648// %m = sext %b to i64
1649// %d = sub nsw i64 %m, %indvar.ext1
1650// Therefore, as long as the result of add/sub/mul is extended to wide type, no
1651// trunc is required regardless of how %b is generated. This pattern is common
1652// when calculating address in 64 bit architecture
1653bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
1654 Instruction *NarrowUse = DU.NarrowUse;
1655 Instruction *NarrowDef = DU.NarrowDef;
1656 Instruction *WideDef = DU.WideDef;
1657
1658 // Handle the common case of add<nsw/nuw>
1659 const unsigned OpCode = NarrowUse->getOpcode();
1660 // Only Add/Sub/Mul instructions are supported.
1661 if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1662 OpCode != Instruction::Mul)
1663 return false;
1664
1665 // The operand that is not defined by NarrowDef of DU. Let's call it the
1666 // other operand.
1667 assert((NarrowUse->getOperand(0) == NarrowDef ||
1668 NarrowUse->getOperand(1) == NarrowDef) &&
1669 "bad DU");
1670
1671 const OverflowingBinaryOperator *OBO =
1672 cast<OverflowingBinaryOperator>(NarrowUse);
1673 ExtendKind ExtKind = getExtendKind(NarrowDef);
1674 bool CanSignExtend = ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap();
1675 bool CanZeroExtend = ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap();
1676 auto AnotherOpExtKind = ExtKind;
1677
1678 // Check that all uses are either:
1679 // - narrow def (in case of we are widening the IV increment);
1680 // - single-input LCSSA Phis;
1681 // - comparison of the chosen type;
1682 // - extend of the chosen type (raison d'etre).
1684 SmallVector<PHINode *, 4> LCSSAPhiUsers;
1686 for (Use &U : NarrowUse->uses()) {
1687 Instruction *User = cast<Instruction>(U.getUser());
1688 if (User == NarrowDef)
1689 continue;
1690 if (!L->contains(User)) {
1691 auto *LCSSAPhi = cast<PHINode>(User);
1692 // Make sure there is only 1 input, so that we don't have to split
1693 // critical edges.
1694 if (LCSSAPhi->getNumOperands() != 1)
1695 return false;
1696 LCSSAPhiUsers.push_back(LCSSAPhi);
1697 continue;
1698 }
1699 if (auto *ICmp = dyn_cast<ICmpInst>(User)) {
1700 auto Pred = ICmp->getPredicate();
1701 // We have 3 types of predicates: signed, unsigned and equality
1702 // predicates. For equality, it's legal to widen icmp for either sign and
1703 // zero extend. For sign extend, we can also do so for signed predicates,
1704 // likeweise for zero extend we can widen icmp for unsigned predicates.
1705 if (ExtKind == ExtendKind::Zero && ICmpInst::isSigned(Pred))
1706 return false;
1707 if (ExtKind == ExtendKind::Sign && ICmpInst::isUnsigned(Pred))
1708 return false;
1709 ICmpUsers.push_back(ICmp);
1710 continue;
1711 }
1712 if (ExtKind == ExtendKind::Sign)
1713 User = dyn_cast<SExtInst>(User);
1714 else
1715 User = dyn_cast<ZExtInst>(User);
1716 if (!User || User->getType() != WideType)
1717 return false;
1718 ExtUsers.push_back(User);
1719 }
1720 if (ExtUsers.empty()) {
1721 DeadInsts.emplace_back(NarrowUse);
1722 return true;
1723 }
1724
1725 // We'll prove some facts that should be true in the context of ext users. If
1726 // there is no users, we are done now. If there are some, pick their common
1727 // dominator as context.
1728 const Instruction *CtxI = findCommonDominator(ExtUsers, *DT);
1729
1730 if (!CanSignExtend && !CanZeroExtend) {
1731 // Because InstCombine turns 'sub nuw' to 'add' losing the no-wrap flag, we
1732 // will most likely not see it. Let's try to prove it.
1733 if (OpCode != Instruction::Add)
1734 return false;
1735 if (ExtKind != ExtendKind::Zero)
1736 return false;
1737 const SCEV *LHS = SE->getSCEV(OBO->getOperand(0));
1738 const SCEV *RHS = SE->getSCEV(OBO->getOperand(1));
1739 // TODO: Support case for NarrowDef = NarrowUse->getOperand(1).
1740 if (NarrowUse->getOperand(0) != NarrowDef)
1741 return false;
1742 if (!SE->isKnownNegative(RHS))
1743 return false;
1744 bool ProvedSubNUW = SE->isKnownPredicateAt(ICmpInst::ICMP_UGE, LHS,
1745 SE->getNegativeSCEV(RHS), CtxI);
1746 if (!ProvedSubNUW)
1747 return false;
1748 // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as
1749 // neg(zext(neg(op))), which is basically sext(op).
1750 AnotherOpExtKind = ExtendKind::Sign;
1751 }
1752
1753 // Verifying that Defining operand is an AddRec
1754 const SCEV *Op1 = SE->getSCEV(WideDef);
1755 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1756 if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1757 return false;
1758
1759 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1760
1761 // Generating a widening use instruction.
1762 Value *LHS =
1763 (NarrowUse->getOperand(0) == NarrowDef)
1764 ? WideDef
1765 : createExtendInst(NarrowUse->getOperand(0), WideType,
1766 AnotherOpExtKind == ExtendKind::Sign, NarrowUse);
1767 Value *RHS =
1768 (NarrowUse->getOperand(1) == NarrowDef)
1769 ? WideDef
1770 : createExtendInst(NarrowUse->getOperand(1), WideType,
1771 AnotherOpExtKind == ExtendKind::Sign, NarrowUse);
1772
1773 auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1774 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1775 NarrowBO->getName());
1776 IRBuilder<> Builder(NarrowUse);
1777 Builder.Insert(WideBO);
1778 WideBO->copyIRFlags(NarrowBO);
1779 ExtendKindMap[NarrowUse] = ExtKind;
1780
1781 for (Instruction *User : ExtUsers) {
1782 assert(User->getType() == WideType && "Checked before!");
1783 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1784 << *WideBO << "\n");
1785 ++NumElimExt;
1786 User->replaceAllUsesWith(WideBO);
1787 DeadInsts.emplace_back(User);
1788 }
1789
1790 for (PHINode *User : LCSSAPhiUsers) {
1791 assert(User->getNumOperands() == 1 && "Checked before!");
1792 Builder.SetInsertPoint(User);
1793 auto *WidePN =
1794 Builder.CreatePHI(WideBO->getType(), 1, User->getName() + ".wide");
1795 BasicBlock *LoopExitingBlock = User->getParent()->getSinglePredecessor();
1796 assert(LoopExitingBlock && L->contains(LoopExitingBlock) &&
1797 "Not a LCSSA Phi?");
1798 WidePN->addIncoming(WideBO, LoopExitingBlock);
1799 Builder.SetInsertPoint(User->getParent(),
1800 User->getParent()->getFirstInsertionPt());
1801 auto *TruncPN = Builder.CreateTrunc(WidePN, User->getType());
1802 User->replaceAllUsesWith(TruncPN);
1803 DeadInsts.emplace_back(User);
1804 }
1805
1806 for (ICmpInst *User : ICmpUsers) {
1807 Builder.SetInsertPoint(User);
1808 auto ExtendedOp = [&](Value * V)->Value * {
1809 if (V == NarrowUse)
1810 return WideBO;
1811 if (ExtKind == ExtendKind::Zero)
1812 return Builder.CreateZExt(V, WideBO->getType());
1813 else
1814 return Builder.CreateSExt(V, WideBO->getType());
1815 };
1816 auto Pred = User->getPredicate();
1817 auto *LHS = ExtendedOp(User->getOperand(0));
1818 auto *RHS = ExtendedOp(User->getOperand(1));
1819 auto *WideCmp =
1820 Builder.CreateICmp(Pred, LHS, RHS, User->getName() + ".wide");
1821 User->replaceAllUsesWith(WideCmp);
1822 DeadInsts.emplace_back(User);
1823 }
1824
1825 return true;
1826}
1827
1828/// Determine whether an individual user of the narrow IV can be widened. If so,
1829/// return the wide clone of the user.
1830Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU,
1831 SCEVExpander &Rewriter, PHINode *OrigPhi,
1832 PHINode *WidePhi) {
1833 assert(ExtendKindMap.count(DU.NarrowDef) &&
1834 "Should already know the kind of extension used to widen NarrowDef");
1835
1836 // This narrow use can be widened by a sext if it's non-negative or its narrow
1837 // def was widened by a sext. Same for zext.
1838 bool CanWidenBySExt =
1839 DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Sign;
1840 bool CanWidenByZExt =
1841 DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Zero;
1842
1843 // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1844 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1845 if (LI->getLoopFor(UsePhi->getParent()) != L) {
1846 // For LCSSA phis, sink the truncate outside the loop.
1847 // After SimplifyCFG most loop exit targets have a single predecessor.
1848 // Otherwise fall back to a truncate within the loop.
1849 if (UsePhi->getNumOperands() != 1)
1850 truncateIVUse(DU);
1851 else {
1852 // Widening the PHI requires us to insert a trunc. The logical place
1853 // for this trunc is in the same BB as the PHI. This is not possible if
1854 // the BB is terminated by a catchswitch.
1855 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1856 return nullptr;
1857
1858 PHINode *WidePhi =
1859 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1860 UsePhi->getIterator());
1861 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1862 BasicBlock *WidePhiBB = WidePhi->getParent();
1863 IRBuilder<> Builder(WidePhiBB, WidePhiBB->getFirstInsertionPt());
1864 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType(), "",
1865 CanWidenByZExt, CanWidenBySExt);
1866 UsePhi->replaceAllUsesWith(Trunc);
1867 DeadInsts.emplace_back(UsePhi);
1868 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1869 << *WidePhi << "\n");
1870 }
1871 return nullptr;
1872 }
1873 }
1874
1875 // Our raison d'etre! Eliminate sign and zero extension.
1876 if ((match(DU.NarrowUse, m_SExtLike(m_Value())) && CanWidenBySExt) ||
1877 (isa<ZExtInst>(DU.NarrowUse) && CanWidenByZExt)) {
1878 Value *NewDef = DU.WideDef;
1879 if (DU.NarrowUse->getType() != WideType) {
1880 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1881 unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1882 if (CastWidth < IVWidth) {
1883 // The cast isn't as wide as the IV, so insert a Trunc.
1884 IRBuilder<> Builder(DU.NarrowUse);
1885 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType(), "",
1886 CanWidenByZExt, CanWidenBySExt);
1887 }
1888 else {
1889 // A wider extend was hidden behind a narrower one. This may induce
1890 // another round of IV widening in which the intermediate IV becomes
1891 // dead. It should be very rare.
1892 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1893 << " not wide enough to subsume " << *DU.NarrowUse
1894 << "\n");
1895 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1896 NewDef = DU.NarrowUse;
1897 }
1898 }
1899 if (NewDef != DU.NarrowUse) {
1900 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1901 << " replaced by " << *DU.WideDef << "\n");
1902 ++NumElimExt;
1903 DU.NarrowUse->replaceAllUsesWith(NewDef);
1904 DeadInsts.emplace_back(DU.NarrowUse);
1905 }
1906 // Now that the extend is gone, we want to expose it's uses for potential
1907 // further simplification. We don't need to directly inform SimplifyIVUsers
1908 // of the new users, because their parent IV will be processed later as a
1909 // new loop phi. If we preserved IVUsers analysis, we would also want to
1910 // push the uses of WideDef here.
1911
1912 // No further widening is needed. The deceased [sz]ext had done it for us.
1913 return nullptr;
1914 }
1915
1916 auto tryAddRecExpansion = [&]() -> Instruction* {
1917 // Does this user itself evaluate to a recurrence after widening?
1918 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1919 if (!WideAddRec.first)
1920 WideAddRec = getWideRecurrence(DU);
1921 assert((WideAddRec.first == nullptr) ==
1922 (WideAddRec.second == ExtendKind::Unknown));
1923 if (!WideAddRec.first)
1924 return nullptr;
1925
1926 // Reuse the IV increment that SCEVExpander created. Recompute flags, unless
1927 // the flags for both increments agree and it is safe to use the ones from
1928 // the original inc. In that case, the new use of the wide increment won't
1929 // be more poisonous.
1930 bool NeedToRecomputeFlags =
1932 DU.NarrowUse, WideInc) ||
1933 DU.NarrowUse->hasNoUnsignedWrap() != WideInc->hasNoUnsignedWrap() ||
1934 DU.NarrowUse->hasNoSignedWrap() != WideInc->hasNoSignedWrap();
1935 Instruction *WideUse = nullptr;
1936 if (WideAddRec.first == WideIncExpr &&
1937 Rewriter.hoistIVInc(WideInc, DU.NarrowUse, NeedToRecomputeFlags))
1938 WideUse = WideInc;
1939 else {
1940 WideUse = cloneIVUser(DU, WideAddRec.first);
1941 if (!WideUse)
1942 return nullptr;
1943 }
1944 // Evaluation of WideAddRec ensured that the narrow expression could be
1945 // extended outside the loop without overflow. This suggests that the wide use
1946 // evaluates to the same expression as the extended narrow use, but doesn't
1947 // absolutely guarantee it. Hence the following failsafe check. In rare cases
1948 // where it fails, we simply throw away the newly created wide use.
1949 if (WideAddRec.first != SE->getSCEV(WideUse)) {
1950 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1951 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1952 << "\n");
1953 DeadInsts.emplace_back(WideUse);
1954 return nullptr;
1955 };
1956
1957 // if we reached this point then we are going to replace
1958 // DU.NarrowUse with WideUse. Reattach DbgValue then.
1959 replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1960
1961 ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1962 // Returning WideUse pushes it on the worklist.
1963 return WideUse;
1964 };
1965
1966 if (auto *I = tryAddRecExpansion())
1967 return I;
1968
1969 // If use is a loop condition, try to promote the condition instead of
1970 // truncating the IV first.
1971 if (widenLoopCompare(DU))
1972 return nullptr;
1973
1974 // We are here about to generate a truncate instruction that may hurt
1975 // performance because the scalar evolution expression computed earlier
1976 // in WideAddRec.first does not indicate a polynomial induction expression.
1977 // In that case, look at the operands of the use instruction to determine
1978 // if we can still widen the use instead of truncating its operand.
1979 if (widenWithVariantUse(DU))
1980 return nullptr;
1981
1982 // This user does not evaluate to a recurrence after widening, so don't
1983 // follow it. Instead insert a Trunc to kill off the original use,
1984 // eventually isolating the original narrow IV so it can be removed.
1985 truncateIVUse(DU);
1986 return nullptr;
1987}
1988
1989/// Add eligible users of NarrowDef to NarrowIVUsers.
1990void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1991 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1992 bool NonNegativeDef =
1993 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1994 SE->getZero(NarrowSCEV->getType()));
1995 for (User *U : NarrowDef->users()) {
1996 Instruction *NarrowUser = cast<Instruction>(U);
1997
1998 // Handle data flow merges and bizarre phi cycles.
1999 if (!Widened.insert(NarrowUser).second)
2000 continue;
2001
2002 bool NonNegativeUse = false;
2003 if (!NonNegativeDef) {
2004 // We might have a control-dependent range information for this context.
2005 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
2006 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
2007 }
2008
2009 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
2010 NonNegativeDef || NonNegativeUse);
2011 }
2012}
2013
2014/// Process a single induction variable. First use the SCEVExpander to create a
2015/// wide induction variable that evaluates to the same recurrence as the
2016/// original narrow IV. Then use a worklist to forward traverse the narrow IV's
2017/// def-use chain. After widenIVUse has processed all interesting IV users, the
2018/// narrow IV will be isolated for removal by DeleteDeadPHIs.
2019///
2020/// It would be simpler to delete uses as they are processed, but we must avoid
2021/// invalidating SCEV expressions.
2022PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
2023 // Is this phi an induction variable?
2024 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
2025 if (!AddRec)
2026 return nullptr;
2027
2028 // Widen the induction variable expression.
2029 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == ExtendKind::Sign
2030 ? SE->getSignExtendExpr(AddRec, WideType)
2031 : SE->getZeroExtendExpr(AddRec, WideType);
2032
2033 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
2034 "Expect the new IV expression to preserve its type");
2035
2036 // Can the IV be extended outside the loop without overflow?
2037 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
2038 if (!AddRec || AddRec->getLoop() != L)
2039 return nullptr;
2040
2041 // An AddRec must have loop-invariant operands. Since this AddRec is
2042 // materialized by a loop header phi, the expression cannot have any post-loop
2043 // operands, so they must dominate the loop header.
2044 assert(
2045 SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
2046 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
2047 "Loop header phi recurrence inputs do not dominate the loop");
2048
2049 // Iterate over IV uses (including transitive ones) looking for IV increments
2050 // of the form 'add nsw %iv, <const>'. For each increment and each use of
2051 // the increment calculate control-dependent range information basing on
2052 // dominating conditions inside of the loop (e.g. a range check inside of the
2053 // loop). Calculated ranges are stored in PostIncRangeInfos map.
2054 //
2055 // Control-dependent range information is later used to prove that a narrow
2056 // definition is not negative (see pushNarrowIVUsers). It's difficult to do
2057 // this on demand because when pushNarrowIVUsers needs this information some
2058 // of the dominating conditions might be already widened.
2060 calculatePostIncRanges(OrigPhi);
2061
2062 // The rewriter provides a value for the desired IV expression. This may
2063 // either find an existing phi or materialize a new one. Either way, we
2064 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
2065 // of the phi-SCC dominates the loop entry.
2066 Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt();
2067 Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt);
2068 // If the wide phi is not a phi node, for example a cast node, like bitcast,
2069 // inttoptr, ptrtoint, just skip for now.
2070 if (!(WidePhi = dyn_cast<PHINode>(ExpandInst))) {
2071 // if the cast node is an inserted instruction without any user, we should
2072 // remove it to make sure the pass don't touch the function as we can not
2073 // wide the phi.
2074 if (ExpandInst->hasNUses(0) &&
2075 Rewriter.isInsertedInstruction(cast<Instruction>(ExpandInst)))
2076 DeadInsts.emplace_back(ExpandInst);
2077 return nullptr;
2078 }
2079
2080 // Remembering the WideIV increment generated by SCEVExpander allows
2081 // widenIVUse to reuse it when widening the narrow IV's increment. We don't
2082 // employ a general reuse mechanism because the call above is the only call to
2083 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
2084 if (BasicBlock *LatchBlock = L->getLoopLatch()) {
2085 WideInc =
2086 dyn_cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
2087 if (WideInc) {
2088 WideIncExpr = SE->getSCEV(WideInc);
2089 // Propagate the debug location associated with the original loop
2090 // increment to the new (widened) increment.
2091 auto *OrigInc =
2092 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
2093
2094 WideInc->setDebugLoc(OrigInc->getDebugLoc());
2095 // We are replacing a narrow IV increment with a wider IV increment. If
2096 // the original (narrow) increment did not wrap, the wider increment one
2097 // should not wrap either. Set the flags to be the union of both wide
2098 // increment and original increment; this ensures we preserve flags SCEV
2099 // could infer for the wider increment. Limit this only to cases where
2100 // both increments directly increment the corresponding PHI nodes and have
2101 // the same opcode. It is not safe to re-use the flags from the original
2102 // increment, if it is more complex and SCEV expansion may have yielded a
2103 // more simplified wider increment.
2105 OrigInc, WideInc) &&
2106 isa<OverflowingBinaryOperator>(OrigInc) &&
2107 isa<OverflowingBinaryOperator>(WideInc)) {
2108 WideInc->setHasNoUnsignedWrap(WideInc->hasNoUnsignedWrap() ||
2109 OrigInc->hasNoUnsignedWrap());
2110 WideInc->setHasNoSignedWrap(WideInc->hasNoSignedWrap() ||
2111 OrigInc->hasNoSignedWrap());
2112 }
2113 }
2114 }
2115
2116 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
2117 ++NumWidened;
2118
2119 // Traverse the def-use chain using a worklist starting at the original IV.
2120 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
2121
2122 Widened.insert(OrigPhi);
2123 pushNarrowIVUsers(OrigPhi, WidePhi);
2124
2125 while (!NarrowIVUsers.empty()) {
2126 WidenIV::NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
2127
2128 // Process a def-use edge. This may replace the use, so don't hold a
2129 // use_iterator across it.
2130 Instruction *WideUse = widenIVUse(DU, Rewriter, OrigPhi, WidePhi);
2131
2132 // Follow all def-use edges from the previous narrow use.
2133 if (WideUse)
2134 pushNarrowIVUsers(DU.NarrowUse, WideUse);
2135
2136 // widenIVUse may have removed the def-use edge.
2137 if (DU.NarrowDef->use_empty())
2138 DeadInsts.emplace_back(DU.NarrowDef);
2139 }
2140
2141 // Attach any debug information to the new PHI.
2142 replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
2143
2144 return WidePhi;
2145}
2146
2147/// Calculates control-dependent range for the given def at the given context
2148/// by looking at dominating conditions inside of the loop
2149void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
2150 Instruction *NarrowUser) {
2151 Value *NarrowDefLHS;
2152 const APInt *NarrowDefRHS;
2153 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
2154 m_APInt(NarrowDefRHS))) ||
2155 !NarrowDefRHS->isNonNegative())
2156 return;
2157
2158 auto UpdateRangeFromCondition = [&] (Value *Condition,
2159 bool TrueDest) {
2160 CmpInst::Predicate Pred;
2161 Value *CmpRHS;
2162 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
2163 m_Value(CmpRHS))))
2164 return;
2165
2167 TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
2168
2169 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
2170 auto CmpConstrainedLHSRange =
2172 auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
2174
2175 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
2176 };
2177
2178 auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
2179 if (!HasGuards)
2180 return;
2181
2182 for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
2183 Ctx->getParent()->rend())) {
2184 Value *C = nullptr;
2185 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
2186 UpdateRangeFromCondition(C, /*TrueDest=*/true);
2187 }
2188 };
2189
2190 UpdateRangeFromGuards(NarrowUser);
2191
2192 BasicBlock *NarrowUserBB = NarrowUser->getParent();
2193 // If NarrowUserBB is statically unreachable asking dominator queries may
2194 // yield surprising results. (e.g. the block may not have a dom tree node)
2195 if (!DT->isReachableFromEntry(NarrowUserBB))
2196 return;
2197
2198 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
2199 L->contains(DTB->getBlock());
2200 DTB = DTB->getIDom()) {
2201 auto *BB = DTB->getBlock();
2202 auto *TI = BB->getTerminator();
2203 UpdateRangeFromGuards(TI);
2204
2205 auto *BI = dyn_cast<BranchInst>(TI);
2206 if (!BI || !BI->isConditional())
2207 continue;
2208
2209 auto *TrueSuccessor = BI->getSuccessor(0);
2210 auto *FalseSuccessor = BI->getSuccessor(1);
2211
2212 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
2213 return BBE.isSingleEdge() &&
2214 DT->dominates(BBE, NarrowUser->getParent());
2215 };
2216
2217 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
2218 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
2219
2220 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
2221 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
2222 }
2223}
2224
2225/// Calculates PostIncRangeInfos map for the given IV
2226void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
2229 Worklist.push_back(OrigPhi);
2230 Visited.insert(OrigPhi);
2231
2232 while (!Worklist.empty()) {
2233 Instruction *NarrowDef = Worklist.pop_back_val();
2234
2235 for (Use &U : NarrowDef->uses()) {
2236 auto *NarrowUser = cast<Instruction>(U.getUser());
2237
2238 // Don't go looking outside the current loop.
2239 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
2240 if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
2241 continue;
2242
2243 if (!Visited.insert(NarrowUser).second)
2244 continue;
2245
2246 Worklist.push_back(NarrowUser);
2247
2248 calculatePostIncRange(NarrowDef, NarrowUser);
2249 }
2250 }
2251}
2252
2254 LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter,
2256 unsigned &NumElimExt, unsigned &NumWidened,
2257 bool HasGuards, bool UsePostIncrementRanges) {
2258 WidenIV Widener(WI, LI, SE, DT, DeadInsts, HasGuards, UsePostIncrementRanges);
2259 PHINode *WidePHI = Widener.createWideIV(Rewriter);
2260 NumElimExt = Widener.getNumElimExt();
2261 NumWidened = Widener.getNumWidened();
2262 return WidePHI;
2263}
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
Rewrite undef for PHI
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define DEBUG_TYPE
#define _
iv Induction Variable Users
Definition: IVUsers.cpp:48
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
static cl::opt< bool > WidenIV("loop-flatten-widen-iv", cl::Hidden, cl::init(true), cl::desc("Widen the loop induction variables, if possible, so " "overflow checks won't reject flattening"))
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Instruction * GetLoopInvariantInsertPosition(Loop *L, Instruction *Hint)
static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE)
Return true if this instruction generates a simple SCEV expression in terms of that IV.
static Instruction * findCommonDominator(ArrayRef< Instruction * > Instructions, DominatorTree &DT)
Find a point in code which dominates all given instructions.
static Instruction * getInsertPointForUses(Instruction *User, Value *Def, DominatorTree *DT, LoopInfo *LI)
Determine the insertion point for this user.
static std::optional< BinaryOp > matchBinaryOp(Instruction *Op)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
Class for arbitrary precision integers.
Definition: APInt.h:77
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:313
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:218
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1200
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Value handle that asserts if the Value is deleted.
Definition: ValueHandle.h:264
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:414
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:167
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:229
Value * getRHS() const
bool isSigned() const
Whether the intrinsic is signed or unsigned.
Instruction::BinaryOps getBinaryOp() const
Returns the binary operation underlying the intrinsic.
Value * getLHS() const
BinaryOps getOpcode() const
Definition: InstrTypes.h:442
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:530
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Definition: InstrTypes.h:850
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
bool isSigned() const
Definition: InstrTypes.h:1007
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:871
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:847
bool isUnsigned() const
Definition: InstrTypes.h:1013
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:857
static ConstantInt * getBool(LLVMContext &Context, bool V)
Definition: Constants.cpp:864
This class represents a range of values.
Definition: ConstantRange.h:47
APInt getUnsignedMin() const
Return the smallest unsigned value contained in the ConstantRange.
static ConstantRange makeAllowedICmpRegion(CmpInst::Predicate Pred, const ConstantRange &Other)
Produce the smallest range such that all values that may satisfy the given predicate with any value c...
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
Definition: Dominators.cpp:344
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2663
Interface for visiting interesting IV users that are recognized but not simplified by this utility.
virtual void anchor()
void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
void setIsExact(bool b=true)
Set or clear the exact flag on this instruction, which must be an operator which supports this flag.
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:473
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition: Operator.h:77
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition: Operator.h:110
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition: Operator.h:104
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1814
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class uses information about analyze scalars to rewrite expressions in canonical form.
static bool canReuseFlagsFromOriginalIVInc(PHINode *OrigPhi, PHINode *WidePhi, Instruction *OrigInc, Instruction *WideInc)
Return true if both increments directly increment the corresponding IV PHI nodes and have the same op...
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
Represents a saturating add/sub intrinsic.
The main scalar evolution driver.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
static SCEV::NoWrapFlags maskFlags(SCEV::NoWrapFlags Flags, int Mask)
Convenient NoWrapFlags manipulation that hides enum casts and is visible in the ScalarEvolution name ...
bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:149
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
Represents an op.with.overflow intrinsic.
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Key
PAL metadata keys.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:875
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
PHINode * createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges)
Widen Induction Variables - Extend the width of an IV to cover its widest uses.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:400
bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
cl::opt< unsigned > SCEVCheapExpansionBudget
bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl< WeakTrackingVH > &Dead)
SimplifyLoopIVs - Simplify users of induction variables within this loop.
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:2717
std::pair< bool, bool > simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
Definition: LCSSA.cpp:77
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Collect information about induction variables that are used by sign/zero extend operations.