LLVM 19.0.0git
ScalarEvolutionExpander.cpp
Go to the documentation of this file.
1//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the implementation of the scalar evolution expander,
10// which is used to generate the code corresponding to a given scalar evolution
11// expression.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/ScopeExit.h"
18#include "llvm/ADT/SmallSet.h"
23#include "llvm/IR/DataLayout.h"
24#include "llvm/IR/Dominators.h"
30
31#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS
32#define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X)
33#else
34#define SCEV_DEBUG_WITH_TYPE(TYPE, X)
35#endif
36
37using namespace llvm;
38
40 "scev-cheap-expansion-budget", cl::Hidden, cl::init(4),
41 cl::desc("When performing SCEV expansion only if it is cheap to do, this "
42 "controls the budget that is considered cheap (default = 4)"));
43
44using namespace PatternMatch;
45
47 NUW = false;
48 NSW = false;
49 Exact = false;
50 Disjoint = false;
51 NNeg = false;
52 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I)) {
53 NUW = OBO->hasNoUnsignedWrap();
54 NSW = OBO->hasNoSignedWrap();
55 }
56 if (auto *PEO = dyn_cast<PossiblyExactOperator>(I))
57 Exact = PEO->isExact();
58 if (auto *PDI = dyn_cast<PossiblyDisjointInst>(I))
59 Disjoint = PDI->isDisjoint();
60 if (auto *PNI = dyn_cast<PossiblyNonNegInst>(I))
61 NNeg = PNI->hasNonNeg();
62 if (auto *TI = dyn_cast<TruncInst>(I)) {
63 NUW = TI->hasNoUnsignedWrap();
64 NSW = TI->hasNoSignedWrap();
65 }
66}
67
69 if (isa<OverflowingBinaryOperator>(I)) {
70 I->setHasNoUnsignedWrap(NUW);
71 I->setHasNoSignedWrap(NSW);
72 }
73 if (isa<PossiblyExactOperator>(I))
74 I->setIsExact(Exact);
75 if (auto *PDI = dyn_cast<PossiblyDisjointInst>(I))
76 PDI->setIsDisjoint(Disjoint);
77 if (auto *PNI = dyn_cast<PossiblyNonNegInst>(I))
78 PNI->setNonNeg(NNeg);
79 if (isa<TruncInst>(I)) {
80 I->setHasNoUnsignedWrap(NUW);
81 I->setHasNoSignedWrap(NSW);
82 }
83}
84
85/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
86/// reusing an existing cast if a suitable one (= dominating IP) exists, or
87/// creating a new one.
88Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
91 // This function must be called with the builder having a valid insertion
92 // point. It doesn't need to be the actual IP where the uses of the returned
93 // cast will be added, but it must dominate such IP.
94 // We use this precondition to produce a cast that will dominate all its
95 // uses. In particular, this is crucial for the case where the builder's
96 // insertion point *is* the point where we were asked to put the cast.
97 // Since we don't know the builder's insertion point is actually
98 // where the uses will be added (only that it dominates it), we are
99 // not allowed to move it.
100 BasicBlock::iterator BIP = Builder.GetInsertPoint();
101
102 Value *Ret = nullptr;
103
104 // Check to see if there is already a cast!
105 for (User *U : V->users()) {
106 if (U->getType() != Ty)
107 continue;
108 CastInst *CI = dyn_cast<CastInst>(U);
109 if (!CI || CI->getOpcode() != Op)
110 continue;
111
112 // Found a suitable cast that is at IP or comes before IP. Use it. Note that
113 // the cast must also properly dominate the Builder's insertion point.
114 if (IP->getParent() == CI->getParent() && &*BIP != CI &&
115 (&*IP == CI || CI->comesBefore(&*IP))) {
116 Ret = CI;
117 break;
118 }
119 }
120
121 // Create a new cast.
122 if (!Ret) {
123 SCEVInsertPointGuard Guard(Builder, this);
124 Builder.SetInsertPoint(&*IP);
125 Ret = Builder.CreateCast(Op, V, Ty, V->getName());
126 }
127
128 // We assert at the end of the function since IP might point to an
129 // instruction with different dominance properties than a cast
130 // (an invoke for example) and not dominate BIP (but the cast does).
131 assert(!isa<Instruction>(Ret) ||
132 SE.DT.dominates(cast<Instruction>(Ret), &*BIP));
133
134 return Ret;
135}
136
139 Instruction *MustDominate) const {
140 BasicBlock::iterator IP = ++I->getIterator();
141 if (auto *II = dyn_cast<InvokeInst>(I))
142 IP = II->getNormalDest()->begin();
143
144 while (isa<PHINode>(IP))
145 ++IP;
146
147 if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) {
148 ++IP;
149 } else if (isa<CatchSwitchInst>(IP)) {
150 IP = MustDominate->getParent()->getFirstInsertionPt();
151 } else {
152 assert(!IP->isEHPad() && "unexpected eh pad!");
153 }
154
155 // Adjust insert point to be after instructions inserted by the expander, so
156 // we can re-use already inserted instructions. Avoid skipping past the
157 // original \p MustDominate, in case it is an inserted instruction.
158 while (isInsertedInstruction(&*IP) && &*IP != MustDominate)
159 ++IP;
160
161 return IP;
162}
163
165SCEVExpander::GetOptimalInsertionPointForCastOf(Value *V) const {
166 // Cast the argument at the beginning of the entry block, after
167 // any bitcasts of other arguments.
168 if (Argument *A = dyn_cast<Argument>(V)) {
169 BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
170 while ((isa<BitCastInst>(IP) &&
171 isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
172 cast<BitCastInst>(IP)->getOperand(0) != A) ||
173 isa<DbgInfoIntrinsic>(IP))
174 ++IP;
175 return IP;
176 }
177
178 // Cast the instruction immediately after the instruction.
179 if (Instruction *I = dyn_cast<Instruction>(V))
180 return findInsertPointAfter(I, &*Builder.GetInsertPoint());
181
182 // Otherwise, this must be some kind of a constant,
183 // so let's plop this cast into the function's entry block.
184 assert(isa<Constant>(V) &&
185 "Expected the cast argument to be a global/constant");
186 return Builder.GetInsertBlock()
187 ->getParent()
188 ->getEntryBlock()
190}
191
192/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
193/// which must be possible with a noop cast, doing what we can to share
194/// the casts.
195Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
196 Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
197 assert((Op == Instruction::BitCast ||
198 Op == Instruction::PtrToInt ||
199 Op == Instruction::IntToPtr) &&
200 "InsertNoopCastOfTo cannot perform non-noop casts!");
201 assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
202 "InsertNoopCastOfTo cannot change sizes!");
203
204 // inttoptr only works for integral pointers. For non-integral pointers, we
205 // can create a GEP on null with the integral value as index. Note that
206 // it is safe to use GEP of null instead of inttoptr here, because only
207 // expressions already based on a GEP of null should be converted to pointers
208 // during expansion.
209 if (Op == Instruction::IntToPtr) {
210 auto *PtrTy = cast<PointerType>(Ty);
211 if (DL.isNonIntegralPointerType(PtrTy))
212 return Builder.CreatePtrAdd(Constant::getNullValue(PtrTy), V, "scevgep");
213 }
214 // Short-circuit unnecessary bitcasts.
215 if (Op == Instruction::BitCast) {
216 if (V->getType() == Ty)
217 return V;
218 if (CastInst *CI = dyn_cast<CastInst>(V)) {
219 if (CI->getOperand(0)->getType() == Ty)
220 return CI->getOperand(0);
221 }
222 }
223 // Short-circuit unnecessary inttoptr<->ptrtoint casts.
224 if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
225 SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
226 if (CastInst *CI = dyn_cast<CastInst>(V))
227 if ((CI->getOpcode() == Instruction::PtrToInt ||
228 CI->getOpcode() == Instruction::IntToPtr) &&
229 SE.getTypeSizeInBits(CI->getType()) ==
231 return CI->getOperand(0);
232 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
233 if ((CE->getOpcode() == Instruction::PtrToInt ||
234 CE->getOpcode() == Instruction::IntToPtr) &&
235 SE.getTypeSizeInBits(CE->getType()) ==
236 SE.getTypeSizeInBits(CE->getOperand(0)->getType()))
237 return CE->getOperand(0);
238 }
239
240 // Fold a cast of a constant.
241 if (Constant *C = dyn_cast<Constant>(V))
242 return ConstantExpr::getCast(Op, C, Ty);
243
244 // Try to reuse existing cast, or insert one.
245 return ReuseOrCreateCast(V, Ty, Op, GetOptimalInsertionPointForCastOf(V));
246}
247
248/// InsertBinop - Insert the specified binary operator, doing a small amount
249/// of work to avoid inserting an obviously redundant operation, and hoisting
250/// to an outer loop when the opportunity is there and it is safe.
251Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
252 Value *LHS, Value *RHS,
253 SCEV::NoWrapFlags Flags, bool IsSafeToHoist) {
254 // Fold a binop with constant operands.
255 if (Constant *CLHS = dyn_cast<Constant>(LHS))
256 if (Constant *CRHS = dyn_cast<Constant>(RHS))
257 if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, DL))
258 return Res;
259
260 // Do a quick scan to see if we have this binop nearby. If so, reuse it.
261 unsigned ScanLimit = 6;
262 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
263 // Scanning starts from the last instruction before the insertion point.
265 if (IP != BlockBegin) {
266 --IP;
267 for (; ScanLimit; --IP, --ScanLimit) {
268 // Don't count dbg.value against the ScanLimit, to avoid perturbing the
269 // generated code.
270 if (isa<DbgInfoIntrinsic>(IP))
271 ScanLimit++;
272
273 auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) {
274 // Ensure that no-wrap flags match.
275 if (isa<OverflowingBinaryOperator>(I)) {
276 if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW))
277 return true;
278 if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW))
279 return true;
280 }
281 // Conservatively, do not use any instruction which has any of exact
282 // flags installed.
283 if (isa<PossiblyExactOperator>(I) && I->isExact())
284 return true;
285 return false;
286 };
287 if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
288 IP->getOperand(1) == RHS && !canGenerateIncompatiblePoison(&*IP))
289 return &*IP;
290 if (IP == BlockBegin) break;
291 }
292 }
293
294 // Save the original insertion point so we can restore it when we're done.
295 DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
296 SCEVInsertPointGuard Guard(Builder, this);
297
298 if (IsSafeToHoist) {
299 // Move the insertion point out of as many loops as we can.
300 while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
301 if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
302 BasicBlock *Preheader = L->getLoopPreheader();
303 if (!Preheader) break;
304
305 // Ok, move up a level.
306 Builder.SetInsertPoint(Preheader->getTerminator());
307 }
308 }
309
310 // If we haven't found this binop, insert it.
311 // TODO: Use the Builder, which will make CreateBinOp below fold with
312 // InstSimplifyFolder.
313 Instruction *BO = Builder.Insert(BinaryOperator::Create(Opcode, LHS, RHS));
314 BO->setDebugLoc(Loc);
315 if (Flags & SCEV::FlagNUW)
317 if (Flags & SCEV::FlagNSW)
318 BO->setHasNoSignedWrap();
319
320 return BO;
321}
322
323/// expandAddToGEP - Expand an addition expression with a pointer type into
324/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
325/// BasicAliasAnalysis and other passes analyze the result. See the rules
326/// for getelementptr vs. inttoptr in
327/// http://llvm.org/docs/LangRef.html#pointeraliasing
328/// for details.
329///
330/// Design note: The correctness of using getelementptr here depends on
331/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
332/// they may introduce pointer arithmetic which may not be safely converted
333/// into getelementptr.
334///
335/// Design note: It might seem desirable for this function to be more
336/// loop-aware. If some of the indices are loop-invariant while others
337/// aren't, it might seem desirable to emit multiple GEPs, keeping the
338/// loop-invariant portions of the overall computation outside the loop.
339/// However, there are a few reasons this is not done here. Hoisting simple
340/// arithmetic is a low-level optimization that often isn't very
341/// important until late in the optimization process. In fact, passes
342/// like InstructionCombining will combine GEPs, even if it means
343/// pushing loop-invariant computation down into loops, so even if the
344/// GEPs were split here, the work would quickly be undone. The
345/// LoopStrengthReduction pass, which is usually run quite late (and
346/// after the last InstructionCombining pass), takes care of hoisting
347/// loop-invariant portions of expressions, after considering what
348/// can be folded using target addressing modes.
349///
350Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Value *V) {
351 assert(!isa<Instruction>(V) ||
352 SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
353
354 Value *Idx = expand(Offset);
355
356 // Fold a GEP with constant operands.
357 if (Constant *CLHS = dyn_cast<Constant>(V))
358 if (Constant *CRHS = dyn_cast<Constant>(Idx))
359 return Builder.CreatePtrAdd(CLHS, CRHS);
360
361 // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
362 unsigned ScanLimit = 6;
363 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
364 // Scanning starts from the last instruction before the insertion point.
366 if (IP != BlockBegin) {
367 --IP;
368 for (; ScanLimit; --IP, --ScanLimit) {
369 // Don't count dbg.value against the ScanLimit, to avoid perturbing the
370 // generated code.
371 if (isa<DbgInfoIntrinsic>(IP))
372 ScanLimit++;
373 if (IP->getOpcode() == Instruction::GetElementPtr &&
374 IP->getOperand(0) == V && IP->getOperand(1) == Idx &&
375 cast<GEPOperator>(&*IP)->getSourceElementType() ==
376 Builder.getInt8Ty())
377 return &*IP;
378 if (IP == BlockBegin) break;
379 }
380 }
381
382 // Save the original insertion point so we can restore it when we're done.
383 SCEVInsertPointGuard Guard(Builder, this);
384
385 // Move the insertion point out of as many loops as we can.
386 while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
387 if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
388 BasicBlock *Preheader = L->getLoopPreheader();
389 if (!Preheader) break;
390
391 // Ok, move up a level.
392 Builder.SetInsertPoint(Preheader->getTerminator());
393 }
394
395 // Emit a GEP.
396 return Builder.CreatePtrAdd(V, Idx, "scevgep");
397}
398
399/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
400/// SCEV expansion. If they are nested, this is the most nested. If they are
401/// neighboring, pick the later.
402static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
403 DominatorTree &DT) {
404 if (!A) return B;
405 if (!B) return A;
406 if (A->contains(B)) return B;
407 if (B->contains(A)) return A;
408 if (DT.dominates(A->getHeader(), B->getHeader())) return B;
409 if (DT.dominates(B->getHeader(), A->getHeader())) return A;
410 return A; // Arbitrarily break the tie.
411}
412
413/// getRelevantLoop - Get the most relevant loop associated with the given
414/// expression, according to PickMostRelevantLoop.
415const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
416 // Test whether we've already computed the most relevant loop for this SCEV.
417 auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr));
418 if (!Pair.second)
419 return Pair.first->second;
420
421 switch (S->getSCEVType()) {
422 case scConstant:
423 case scVScale:
424 return nullptr; // A constant has no relevant loops.
425 case scTruncate:
426 case scZeroExtend:
427 case scSignExtend:
428 case scPtrToInt:
429 case scAddExpr:
430 case scMulExpr:
431 case scUDivExpr:
432 case scAddRecExpr:
433 case scUMaxExpr:
434 case scSMaxExpr:
435 case scUMinExpr:
436 case scSMinExpr:
438 const Loop *L = nullptr;
439 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
440 L = AR->getLoop();
441 for (const SCEV *Op : S->operands())
442 L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT);
443 return RelevantLoops[S] = L;
444 }
445 case scUnknown: {
446 const SCEVUnknown *U = cast<SCEVUnknown>(S);
447 if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
448 return Pair.first->second = SE.LI.getLoopFor(I->getParent());
449 // A non-instruction has no relevant loops.
450 return nullptr;
451 }
453 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
454 }
455 llvm_unreachable("Unexpected SCEV type!");
456}
457
458namespace {
459
460/// LoopCompare - Compare loops by PickMostRelevantLoop.
461class LoopCompare {
462 DominatorTree &DT;
463public:
464 explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
465
466 bool operator()(std::pair<const Loop *, const SCEV *> LHS,
467 std::pair<const Loop *, const SCEV *> RHS) const {
468 // Keep pointer operands sorted at the end.
469 if (LHS.second->getType()->isPointerTy() !=
470 RHS.second->getType()->isPointerTy())
471 return LHS.second->getType()->isPointerTy();
472
473 // Compare loops with PickMostRelevantLoop.
474 if (LHS.first != RHS.first)
475 return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
476
477 // If one operand is a non-constant negative and the other is not,
478 // put the non-constant negative on the right so that a sub can
479 // be used instead of a negate and add.
480 if (LHS.second->isNonConstantNegative()) {
481 if (!RHS.second->isNonConstantNegative())
482 return false;
483 } else if (RHS.second->isNonConstantNegative())
484 return true;
485
486 // Otherwise they are equivalent according to this comparison.
487 return false;
488 }
489};
490
491}
492
493Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
494 // Recognize the canonical representation of an unsimplifed urem.
495 const SCEV *URemLHS = nullptr;
496 const SCEV *URemRHS = nullptr;
497 if (SE.matchURem(S, URemLHS, URemRHS)) {
498 Value *LHS = expand(URemLHS);
499 Value *RHS = expand(URemRHS);
500 return InsertBinop(Instruction::URem, LHS, RHS, SCEV::FlagAnyWrap,
501 /*IsSafeToHoist*/ false);
502 }
503
504 // Collect all the add operands in a loop, along with their associated loops.
505 // Iterate in reverse so that constants are emitted last, all else equal, and
506 // so that pointer operands are inserted first, which the code below relies on
507 // to form more involved GEPs.
509 for (const SCEV *Op : reverse(S->operands()))
510 OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op));
511
512 // Sort by loop. Use a stable sort so that constants follow non-constants and
513 // pointer operands precede non-pointer operands.
514 llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
515
516 // Emit instructions to add all the operands. Hoist as much as possible
517 // out of loops, and form meaningful getelementptrs where possible.
518 Value *Sum = nullptr;
519 for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {
520 const Loop *CurLoop = I->first;
521 const SCEV *Op = I->second;
522 if (!Sum) {
523 // This is the first operand. Just expand it.
524 Sum = expand(Op);
525 ++I;
526 continue;
527 }
528
529 assert(!Op->getType()->isPointerTy() && "Only first op can be pointer");
530 if (isa<PointerType>(Sum->getType())) {
531 // The running sum expression is a pointer. Try to form a getelementptr
532 // at this level with that as the base.
534 for (; I != E && I->first == CurLoop; ++I) {
535 // If the operand is SCEVUnknown and not instructions, peek through
536 // it, to enable more of it to be folded into the GEP.
537 const SCEV *X = I->second;
538 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X))
539 if (!isa<Instruction>(U->getValue()))
540 X = SE.getSCEV(U->getValue());
541 NewOps.push_back(X);
542 }
543 Sum = expandAddToGEP(SE.getAddExpr(NewOps), Sum);
544 } else if (Op->isNonConstantNegative()) {
545 // Instead of doing a negate and add, just do a subtract.
546 Value *W = expand(SE.getNegativeSCEV(Op));
547 Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap,
548 /*IsSafeToHoist*/ true);
549 ++I;
550 } else {
551 // A simple add.
552 Value *W = expand(Op);
553 // Canonicalize a constant to the RHS.
554 if (isa<Constant>(Sum))
555 std::swap(Sum, W);
556 Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(),
557 /*IsSafeToHoist*/ true);
558 ++I;
559 }
560 }
561
562 return Sum;
563}
564
565Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
566 Type *Ty = S->getType();
567
568 // Collect all the mul operands in a loop, along with their associated loops.
569 // Iterate in reverse so that constants are emitted last, all else equal.
571 for (const SCEV *Op : reverse(S->operands()))
572 OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op));
573
574 // Sort by loop. Use a stable sort so that constants follow non-constants.
575 llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
576
577 // Emit instructions to mul all the operands. Hoist as much as possible
578 // out of loops.
579 Value *Prod = nullptr;
580 auto I = OpsAndLoops.begin();
581
582 // Expand the calculation of X pow N in the following manner:
583 // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
584 // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK).
585 const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops]() {
586 auto E = I;
587 // Calculate how many times the same operand from the same loop is included
588 // into this power.
589 uint64_t Exponent = 0;
590 const uint64_t MaxExponent = UINT64_MAX >> 1;
591 // No one sane will ever try to calculate such huge exponents, but if we
592 // need this, we stop on UINT64_MAX / 2 because we need to exit the loop
593 // below when the power of 2 exceeds our Exponent, and we want it to be
594 // 1u << 31 at most to not deal with unsigned overflow.
595 while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) {
596 ++Exponent;
597 ++E;
598 }
599 assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?");
600
601 // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
602 // that are needed into the result.
603 Value *P = expand(I->second);
604 Value *Result = nullptr;
605 if (Exponent & 1)
606 Result = P;
607 for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) {
608 P = InsertBinop(Instruction::Mul, P, P, SCEV::FlagAnyWrap,
609 /*IsSafeToHoist*/ true);
610 if (Exponent & BinExp)
611 Result = Result ? InsertBinop(Instruction::Mul, Result, P,
613 /*IsSafeToHoist*/ true)
614 : P;
615 }
616
617 I = E;
618 assert(Result && "Nothing was expanded?");
619 return Result;
620 };
621
622 while (I != OpsAndLoops.end()) {
623 if (!Prod) {
624 // This is the first operand. Just expand it.
625 Prod = ExpandOpBinPowN();
626 } else if (I->second->isAllOnesValue()) {
627 // Instead of doing a multiply by negative one, just do a negate.
628 Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod,
629 SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
630 ++I;
631 } else {
632 // A simple mul.
633 Value *W = ExpandOpBinPowN();
634 // Canonicalize a constant to the RHS.
635 if (isa<Constant>(Prod)) std::swap(Prod, W);
636 const APInt *RHS;
637 if (match(W, m_Power2(RHS))) {
638 // Canonicalize Prod*(1<<C) to Prod<<C.
639 assert(!Ty->isVectorTy() && "vector types are not SCEVable");
640 auto NWFlags = S->getNoWrapFlags();
641 // clear nsw flag if shl will produce poison value.
642 if (RHS->logBase2() == RHS->getBitWidth() - 1)
643 NWFlags = ScalarEvolution::clearFlags(NWFlags, SCEV::FlagNSW);
644 Prod = InsertBinop(Instruction::Shl, Prod,
645 ConstantInt::get(Ty, RHS->logBase2()), NWFlags,
646 /*IsSafeToHoist*/ true);
647 } else {
648 Prod = InsertBinop(Instruction::Mul, Prod, W, S->getNoWrapFlags(),
649 /*IsSafeToHoist*/ true);
650 }
651 }
652 }
653
654 return Prod;
655}
656
657Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
658 Value *LHS = expand(S->getLHS());
659 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
660 const APInt &RHS = SC->getAPInt();
661 if (RHS.isPowerOf2())
662 return InsertBinop(Instruction::LShr, LHS,
663 ConstantInt::get(SC->getType(), RHS.logBase2()),
664 SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
665 }
666
667 Value *RHS = expand(S->getRHS());
668 return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap,
669 /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS()));
670}
671
672/// Determine if this is a well-behaved chain of instructions leading back to
673/// the PHI. If so, it may be reused by expanded expressions.
674bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
675 const Loop *L) {
676 if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
677 (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV)))
678 return false;
679 // If any of the operands don't dominate the insert position, bail.
680 // Addrec operands are always loop-invariant, so this can only happen
681 // if there are instructions which haven't been hoisted.
682 if (L == IVIncInsertLoop) {
683 for (Use &Op : llvm::drop_begin(IncV->operands()))
684 if (Instruction *OInst = dyn_cast<Instruction>(Op))
685 if (!SE.DT.dominates(OInst, IVIncInsertPos))
686 return false;
687 }
688 // Advance to the next instruction.
689 IncV = dyn_cast<Instruction>(IncV->getOperand(0));
690 if (!IncV)
691 return false;
692
693 if (IncV->mayHaveSideEffects())
694 return false;
695
696 if (IncV == PN)
697 return true;
698
699 return isNormalAddRecExprPHI(PN, IncV, L);
700}
701
702/// getIVIncOperand returns an induction variable increment's induction
703/// variable operand.
704///
705/// If allowScale is set, any type of GEP is allowed as long as the nonIV
706/// operands dominate InsertPos.
707///
708/// If allowScale is not set, ensure that a GEP increment conforms to one of the
709/// simple patterns generated by getAddRecExprPHILiterally and
710/// expandAddtoGEP. If the pattern isn't recognized, return NULL.
712 Instruction *InsertPos,
713 bool allowScale) {
714 if (IncV == InsertPos)
715 return nullptr;
716
717 switch (IncV->getOpcode()) {
718 default:
719 return nullptr;
720 // Check for a simple Add/Sub or GEP of a loop invariant step.
721 case Instruction::Add:
722 case Instruction::Sub: {
723 Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
724 if (!OInst || SE.DT.dominates(OInst, InsertPos))
725 return dyn_cast<Instruction>(IncV->getOperand(0));
726 return nullptr;
727 }
728 case Instruction::BitCast:
729 return dyn_cast<Instruction>(IncV->getOperand(0));
730 case Instruction::GetElementPtr:
731 for (Use &U : llvm::drop_begin(IncV->operands())) {
732 if (isa<Constant>(U))
733 continue;
734 if (Instruction *OInst = dyn_cast<Instruction>(U)) {
735 if (!SE.DT.dominates(OInst, InsertPos))
736 return nullptr;
737 }
738 if (allowScale) {
739 // allow any kind of GEP as long as it can be hoisted.
740 continue;
741 }
742 // GEPs produced by SCEVExpander use i8 element type.
743 if (!cast<GEPOperator>(IncV)->getSourceElementType()->isIntegerTy(8))
744 return nullptr;
745 break;
746 }
747 return dyn_cast<Instruction>(IncV->getOperand(0));
748 }
749}
750
751/// If the insert point of the current builder or any of the builders on the
752/// stack of saved builders has 'I' as its insert point, update it to point to
753/// the instruction after 'I'. This is intended to be used when the instruction
754/// 'I' is being moved. If this fixup is not done and 'I' is moved to a
755/// different block, the inconsistent insert point (with a mismatched
756/// Instruction and Block) can lead to an instruction being inserted in a block
757/// other than its parent.
758void SCEVExpander::fixupInsertPoints(Instruction *I) {
760 BasicBlock::iterator NewInsertPt = std::next(It);
761 if (Builder.GetInsertPoint() == It)
762 Builder.SetInsertPoint(&*NewInsertPt);
763 for (auto *InsertPtGuard : InsertPointGuards)
764 if (InsertPtGuard->GetInsertPoint() == It)
765 InsertPtGuard->SetInsertPoint(NewInsertPt);
766}
767
768/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
769/// it available to other uses in this loop. Recursively hoist any operands,
770/// until we reach a value that dominates InsertPos.
772 bool RecomputePoisonFlags) {
773 auto FixupPoisonFlags = [this](Instruction *I) {
774 // Drop flags that are potentially inferred from old context and infer flags
775 // in new context.
776 rememberFlags(I);
777 I->dropPoisonGeneratingFlags();
778 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I))
779 if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
780 auto *BO = cast<BinaryOperator>(I);
785 }
786 };
787
788 if (SE.DT.dominates(IncV, InsertPos)) {
789 if (RecomputePoisonFlags)
790 FixupPoisonFlags(IncV);
791 return true;
792 }
793
794 // InsertPos must itself dominate IncV so that IncV's new position satisfies
795 // its existing users.
796 if (isa<PHINode>(InsertPos) ||
797 !SE.DT.dominates(InsertPos->getParent(), IncV->getParent()))
798 return false;
799
800 if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos))
801 return false;
802
803 // Check that the chain of IV operands leading back to Phi can be hoisted.
805 for(;;) {
806 Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true);
807 if (!Oper)
808 return false;
809 // IncV is safe to hoist.
810 IVIncs.push_back(IncV);
811 IncV = Oper;
812 if (SE.DT.dominates(IncV, InsertPos))
813 break;
814 }
815 for (Instruction *I : llvm::reverse(IVIncs)) {
816 fixupInsertPoints(I);
817 I->moveBefore(InsertPos);
818 if (RecomputePoisonFlags)
819 FixupPoisonFlags(I);
820 }
821 return true;
822}
823
825 PHINode *WidePhi,
826 Instruction *OrigInc,
827 Instruction *WideInc) {
828 return match(OrigInc, m_c_BinOp(m_Specific(OrigPhi), m_Value())) &&
829 match(WideInc, m_c_BinOp(m_Specific(WidePhi), m_Value())) &&
830 OrigInc->getOpcode() == WideInc->getOpcode();
831}
832
833/// Determine if this cyclic phi is in a form that would have been generated by
834/// LSR. We don't care if the phi was actually expanded in this pass, as long
835/// as it is in a low-cost form, for example, no implied multiplication. This
836/// should match any patterns generated by getAddRecExprPHILiterally and
837/// expandAddtoGEP.
838bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
839 const Loop *L) {
840 for(Instruction *IVOper = IncV;
841 (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(),
842 /*allowScale=*/false));) {
843 if (IVOper == PN)
844 return true;
845 }
846 return false;
847}
848
849/// expandIVInc - Expand an IV increment at Builder's current InsertPos.
850/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
851/// need to materialize IV increments elsewhere to handle difficult situations.
852Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
853 bool useSubtract) {
854 Value *IncV;
855 // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
856 if (PN->getType()->isPointerTy()) {
857 // TODO: Change name to IVName.iv.next.
858 IncV = Builder.CreatePtrAdd(PN, StepV, "scevgep");
859 } else {
860 IncV = useSubtract ?
861 Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
862 Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
863 }
864 return IncV;
865}
866
867/// Check whether we can cheaply express the requested SCEV in terms of
868/// the available PHI SCEV by truncation and/or inversion of the step.
870 const SCEVAddRecExpr *Phi,
871 const SCEVAddRecExpr *Requested,
872 bool &InvertStep) {
873 // We can't transform to match a pointer PHI.
874 Type *PhiTy = Phi->getType();
875 Type *RequestedTy = Requested->getType();
876 if (PhiTy->isPointerTy() || RequestedTy->isPointerTy())
877 return false;
878
879 if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
880 return false;
881
882 // Try truncate it if necessary.
883 Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy));
884 if (!Phi)
885 return false;
886
887 // Check whether truncation will help.
888 if (Phi == Requested) {
889 InvertStep = false;
890 return true;
891 }
892
893 // Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
894 if (SE.getMinusSCEV(Requested->getStart(), Requested) == Phi) {
895 InvertStep = true;
896 return true;
897 }
898
899 return false;
900}
901
902static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
903 if (!isa<IntegerType>(AR->getType()))
904 return false;
905
906 unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
907 Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
908 const SCEV *Step = AR->getStepRecurrence(SE);
909 const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy),
910 SE.getSignExtendExpr(AR, WideTy));
911 const SCEV *ExtendAfterOp =
912 SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy);
913 return ExtendAfterOp == OpAfterExtend;
914}
915
916static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
917 if (!isa<IntegerType>(AR->getType()))
918 return false;
919
920 unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
921 Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
922 const SCEV *Step = AR->getStepRecurrence(SE);
923 const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy),
924 SE.getZeroExtendExpr(AR, WideTy));
925 const SCEV *ExtendAfterOp =
926 SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy);
927 return ExtendAfterOp == OpAfterExtend;
928}
929
930/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
931/// the base addrec, which is the addrec without any non-loop-dominating
932/// values, and return the PHI.
933PHINode *
934SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
935 const Loop *L, Type *&TruncTy,
936 bool &InvertStep) {
937 assert((!IVIncInsertLoop || IVIncInsertPos) &&
938 "Uninitialized insert position");
939
940 // Reuse a previously-inserted PHI, if present.
941 BasicBlock *LatchBlock = L->getLoopLatch();
942 if (LatchBlock) {
943 PHINode *AddRecPhiMatch = nullptr;
944 Instruction *IncV = nullptr;
945 TruncTy = nullptr;
946 InvertStep = false;
947
948 // Only try partially matching scevs that need truncation and/or
949 // step-inversion if we know this loop is outside the current loop.
950 bool TryNonMatchingSCEV =
951 IVIncInsertLoop &&
952 SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());
953
954 for (PHINode &PN : L->getHeader()->phis()) {
955 if (!SE.isSCEVable(PN.getType()))
956 continue;
957
958 // We should not look for a incomplete PHI. Getting SCEV for a incomplete
959 // PHI has no meaning at all.
960 if (!PN.isComplete()) {
962 DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n");
963 continue;
964 }
965
966 const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
967 if (!PhiSCEV)
968 continue;
969
970 bool IsMatchingSCEV = PhiSCEV == Normalized;
971 // We only handle truncation and inversion of phi recurrences for the
972 // expanded expression if the expanded expression's loop dominates the
973 // loop we insert to. Check now, so we can bail out early.
974 if (!IsMatchingSCEV && !TryNonMatchingSCEV)
975 continue;
976
977 // TODO: this possibly can be reworked to avoid this cast at all.
978 Instruction *TempIncV =
979 dyn_cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock));
980 if (!TempIncV)
981 continue;
982
983 // Check whether we can reuse this PHI node.
984 if (LSRMode) {
985 if (!isExpandedAddRecExprPHI(&PN, TempIncV, L))
986 continue;
987 } else {
988 if (!isNormalAddRecExprPHI(&PN, TempIncV, L))
989 continue;
990 }
991
992 // Stop if we have found an exact match SCEV.
993 if (IsMatchingSCEV) {
994 IncV = TempIncV;
995 TruncTy = nullptr;
996 InvertStep = false;
997 AddRecPhiMatch = &PN;
998 break;
999 }
1000
1001 // Try whether the phi can be translated into the requested form
1002 // (truncated and/or offset by a constant).
1003 if ((!TruncTy || InvertStep) &&
1004 canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) {
1005 // Record the phi node. But don't stop we might find an exact match
1006 // later.
1007 AddRecPhiMatch = &PN;
1008 IncV = TempIncV;
1009 TruncTy = Normalized->getType();
1010 }
1011 }
1012
1013 if (AddRecPhiMatch) {
1014 // Ok, the add recurrence looks usable.
1015 // Remember this PHI, even in post-inc mode.
1016 InsertedValues.insert(AddRecPhiMatch);
1017 // Remember the increment.
1018 rememberInstruction(IncV);
1019 // Those values were not actually inserted but re-used.
1020 ReusedValues.insert(AddRecPhiMatch);
1021 ReusedValues.insert(IncV);
1022 return AddRecPhiMatch;
1023 }
1024 }
1025
1026 // Save the original insertion point so we can restore it when we're done.
1027 SCEVInsertPointGuard Guard(Builder, this);
1028
1029 // Another AddRec may need to be recursively expanded below. For example, if
1030 // this AddRec is quadratic, the StepV may itself be an AddRec in this
1031 // loop. Remove this loop from the PostIncLoops set before expanding such
1032 // AddRecs. Otherwise, we cannot find a valid position for the step
1033 // (i.e. StepV can never dominate its loop header). Ideally, we could do
1034 // SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
1035 // so it's not worth implementing SmallPtrSet::swap.
1036 PostIncLoopSet SavedPostIncLoops = PostIncLoops;
1037 PostIncLoops.clear();
1038
1039 // Expand code for the start value into the loop preheader.
1040 assert(L->getLoopPreheader() &&
1041 "Can't expand add recurrences without a loop preheader!");
1042 Value *StartV =
1043 expand(Normalized->getStart(), L->getLoopPreheader()->getTerminator());
1044
1045 // StartV must have been be inserted into L's preheader to dominate the new
1046 // phi.
1047 assert(!isa<Instruction>(StartV) ||
1048 SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
1049 L->getHeader()));
1050
1051 // Expand code for the step value. Do this before creating the PHI so that PHI
1052 // reuse code doesn't see an incomplete PHI.
1053 const SCEV *Step = Normalized->getStepRecurrence(SE);
1054 Type *ExpandTy = Normalized->getType();
1055 // If the stride is negative, insert a sub instead of an add for the increment
1056 // (unless it's a constant, because subtracts of constants are canonicalized
1057 // to adds).
1058 bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
1059 if (useSubtract)
1060 Step = SE.getNegativeSCEV(Step);
1061 // Expand the step somewhere that dominates the loop header.
1062 Value *StepV = expand(Step, L->getHeader()->getFirstInsertionPt());
1063
1064 // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
1065 // we actually do emit an addition. It does not apply if we emit a
1066 // subtraction.
1067 bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized);
1068 bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized);
1069
1070 // Create the PHI.
1071 BasicBlock *Header = L->getHeader();
1072 Builder.SetInsertPoint(Header, Header->begin());
1073 PHINode *PN =
1074 Builder.CreatePHI(ExpandTy, pred_size(Header), Twine(IVName) + ".iv");
1075
1076 // Create the step instructions and populate the PHI.
1077 for (BasicBlock *Pred : predecessors(Header)) {
1078 // Add a start value.
1079 if (!L->contains(Pred)) {
1080 PN->addIncoming(StartV, Pred);
1081 continue;
1082 }
1083
1084 // Create a step value and add it to the PHI.
1085 // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
1086 // instructions at IVIncInsertPos.
1087 Instruction *InsertPos = L == IVIncInsertLoop ?
1088 IVIncInsertPos : Pred->getTerminator();
1089 Builder.SetInsertPoint(InsertPos);
1090 Value *IncV = expandIVInc(PN, StepV, L, useSubtract);
1091
1092 if (isa<OverflowingBinaryOperator>(IncV)) {
1093 if (IncrementIsNUW)
1094 cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap();
1095 if (IncrementIsNSW)
1096 cast<BinaryOperator>(IncV)->setHasNoSignedWrap();
1097 }
1098 PN->addIncoming(IncV, Pred);
1099 }
1100
1101 // After expanding subexpressions, restore the PostIncLoops set so the caller
1102 // can ensure that IVIncrement dominates the current uses.
1103 PostIncLoops = SavedPostIncLoops;
1104
1105 // Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most
1106 // effective when we are able to use an IV inserted here, so record it.
1107 InsertedValues.insert(PN);
1108 InsertedIVs.push_back(PN);
1109 return PN;
1110}
1111
1112Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
1113 const Loop *L = S->getLoop();
1114
1115 // Determine a normalized form of this expression, which is the expression
1116 // before any post-inc adjustment is made.
1117 const SCEVAddRecExpr *Normalized = S;
1118 if (PostIncLoops.count(L)) {
1120 Loops.insert(L);
1121 Normalized = cast<SCEVAddRecExpr>(
1122 normalizeForPostIncUse(S, Loops, SE, /*CheckInvertible=*/false));
1123 }
1124
1125 [[maybe_unused]] const SCEV *Start = Normalized->getStart();
1126 const SCEV *Step = Normalized->getStepRecurrence(SE);
1127 assert(SE.properlyDominates(Start, L->getHeader()) &&
1128 "Start does not properly dominate loop header");
1129 assert(SE.dominates(Step, L->getHeader()) && "Step not dominate loop header");
1130
1131 // In some cases, we decide to reuse an existing phi node but need to truncate
1132 // it and/or invert the step.
1133 Type *TruncTy = nullptr;
1134 bool InvertStep = false;
1135 PHINode *PN = getAddRecExprPHILiterally(Normalized, L, TruncTy, InvertStep);
1136
1137 // Accommodate post-inc mode, if necessary.
1138 Value *Result;
1139 if (!PostIncLoops.count(L))
1140 Result = PN;
1141 else {
1142 // In PostInc mode, use the post-incremented value.
1143 BasicBlock *LatchBlock = L->getLoopLatch();
1144 assert(LatchBlock && "PostInc mode requires a unique loop latch!");
1145 Result = PN->getIncomingValueForBlock(LatchBlock);
1146
1147 // We might be introducing a new use of the post-inc IV that is not poison
1148 // safe, in which case we should drop poison generating flags. Only keep
1149 // those flags for which SCEV has proven that they always hold.
1150 if (isa<OverflowingBinaryOperator>(Result)) {
1151 auto *I = cast<Instruction>(Result);
1152 if (!S->hasNoUnsignedWrap())
1153 I->setHasNoUnsignedWrap(false);
1154 if (!S->hasNoSignedWrap())
1155 I->setHasNoSignedWrap(false);
1156 }
1157
1158 // For an expansion to use the postinc form, the client must call
1159 // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
1160 // or dominated by IVIncInsertPos.
1161 if (isa<Instruction>(Result) &&
1162 !SE.DT.dominates(cast<Instruction>(Result),
1163 &*Builder.GetInsertPoint())) {
1164 // The induction variable's postinc expansion does not dominate this use.
1165 // IVUsers tries to prevent this case, so it is rare. However, it can
1166 // happen when an IVUser outside the loop is not dominated by the latch
1167 // block. Adjusting IVIncInsertPos before expansion begins cannot handle
1168 // all cases. Consider a phi outside whose operand is replaced during
1169 // expansion with the value of the postinc user. Without fundamentally
1170 // changing the way postinc users are tracked, the only remedy is
1171 // inserting an extra IV increment. StepV might fold into PostLoopOffset,
1172 // but hopefully expandCodeFor handles that.
1173 bool useSubtract =
1174 !S->getType()->isPointerTy() && Step->isNonConstantNegative();
1175 if (useSubtract)
1176 Step = SE.getNegativeSCEV(Step);
1177 Value *StepV;
1178 {
1179 // Expand the step somewhere that dominates the loop header.
1180 SCEVInsertPointGuard Guard(Builder, this);
1181 StepV = expand(Step, L->getHeader()->getFirstInsertionPt());
1182 }
1183 Result = expandIVInc(PN, StepV, L, useSubtract);
1184 }
1185 }
1186
1187 // We have decided to reuse an induction variable of a dominating loop. Apply
1188 // truncation and/or inversion of the step.
1189 if (TruncTy) {
1190 // Truncate the result.
1191 if (TruncTy != Result->getType())
1192 Result = Builder.CreateTrunc(Result, TruncTy);
1193
1194 // Invert the result.
1195 if (InvertStep)
1196 Result = Builder.CreateSub(expand(Normalized->getStart()), Result);
1197 }
1198
1199 return Result;
1200}
1201
1202Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
1203 // In canonical mode we compute the addrec as an expression of a canonical IV
1204 // using evaluateAtIteration and expand the resulting SCEV expression. This
1205 // way we avoid introducing new IVs to carry on the computation of the addrec
1206 // throughout the loop.
1207 //
1208 // For nested addrecs evaluateAtIteration might need a canonical IV of a
1209 // type wider than the addrec itself. Emitting a canonical IV of the
1210 // proper type might produce non-legal types, for example expanding an i64
1211 // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall
1212 // back to non-canonical mode for nested addrecs.
1213 if (!CanonicalMode || (S->getNumOperands() > 2))
1214 return expandAddRecExprLiterally(S);
1215
1216 Type *Ty = SE.getEffectiveSCEVType(S->getType());
1217 const Loop *L = S->getLoop();
1218
1219 // First check for an existing canonical IV in a suitable type.
1220 PHINode *CanonicalIV = nullptr;
1221 if (PHINode *PN = L->getCanonicalInductionVariable())
1222 if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
1223 CanonicalIV = PN;
1224
1225 // Rewrite an AddRec in terms of the canonical induction variable, if
1226 // its type is more narrow.
1227 if (CanonicalIV &&
1228 SE.getTypeSizeInBits(CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty) &&
1229 !S->getType()->isPointerTy()) {
1231 for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
1232 NewOps[i] = SE.getAnyExtendExpr(S->getOperand(i), CanonicalIV->getType());
1233 Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
1235 BasicBlock::iterator NewInsertPt =
1236 findInsertPointAfter(cast<Instruction>(V), &*Builder.GetInsertPoint());
1237 V = expand(SE.getTruncateExpr(SE.getUnknown(V), Ty), NewInsertPt);
1238 return V;
1239 }
1240
1241 // {X,+,F} --> X + {0,+,F}
1242 if (!S->getStart()->isZero()) {
1243 if (isa<PointerType>(S->getType())) {
1244 Value *StartV = expand(SE.getPointerBase(S));
1245 return expandAddToGEP(SE.removePointerBase(S), StartV);
1246 }
1247
1249 NewOps[0] = SE.getConstant(Ty, 0);
1250 const SCEV *Rest = SE.getAddRecExpr(NewOps, L,
1252
1253 // Just do a normal add. Pre-expand the operands to suppress folding.
1254 //
1255 // The LHS and RHS values are factored out of the expand call to make the
1256 // output independent of the argument evaluation order.
1257 const SCEV *AddExprLHS = SE.getUnknown(expand(S->getStart()));
1258 const SCEV *AddExprRHS = SE.getUnknown(expand(Rest));
1259 return expand(SE.getAddExpr(AddExprLHS, AddExprRHS));
1260 }
1261
1262 // If we don't yet have a canonical IV, create one.
1263 if (!CanonicalIV) {
1264 // Create and insert the PHI node for the induction variable in the
1265 // specified loop.
1266 BasicBlock *Header = L->getHeader();
1267 pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
1268 CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar");
1269 CanonicalIV->insertBefore(Header->begin());
1270 rememberInstruction(CanonicalIV);
1271
1273 Constant *One = ConstantInt::get(Ty, 1);
1274 for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
1275 BasicBlock *HP = *HPI;
1276 if (!PredSeen.insert(HP).second) {
1277 // There must be an incoming value for each predecessor, even the
1278 // duplicates!
1279 CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP);
1280 continue;
1281 }
1282
1283 if (L->contains(HP)) {
1284 // Insert a unit add instruction right before the terminator
1285 // corresponding to the back-edge.
1286 Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
1287 "indvar.next",
1288 HP->getTerminator()->getIterator());
1289 Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
1290 rememberInstruction(Add);
1291 CanonicalIV->addIncoming(Add, HP);
1292 } else {
1293 CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP);
1294 }
1295 }
1296 }
1297
1298 // {0,+,1} --> Insert a canonical induction variable into the loop!
1299 if (S->isAffine() && S->getOperand(1)->isOne()) {
1300 assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
1301 "IVs with types different from the canonical IV should "
1302 "already have been handled!");
1303 return CanonicalIV;
1304 }
1305
1306 // {0,+,F} --> {0,+,1} * F
1307
1308 // If this is a simple linear addrec, emit it now as a special case.
1309 if (S->isAffine()) // {0,+,F} --> i*F
1310 return
1311 expand(SE.getTruncateOrNoop(
1312 SE.getMulExpr(SE.getUnknown(CanonicalIV),
1314 CanonicalIV->getType())),
1315 Ty));
1316
1317 // If this is a chain of recurrences, turn it into a closed form, using the
1318 // folders, then expandCodeFor the closed form. This allows the folders to
1319 // simplify the expression without having to build a bunch of special code
1320 // into this folder.
1321 const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV.
1322
1323 // Promote S up to the canonical IV type, if the cast is foldable.
1324 const SCEV *NewS = S;
1325 const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType());
1326 if (isa<SCEVAddRecExpr>(Ext))
1327 NewS = Ext;
1328
1329 const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
1330
1331 // Truncate the result down to the original type, if needed.
1332 const SCEV *T = SE.getTruncateOrNoop(V, Ty);
1333 return expand(T);
1334}
1335
1336Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) {
1337 Value *V = expand(S->getOperand());
1338 return ReuseOrCreateCast(V, S->getType(), CastInst::PtrToInt,
1339 GetOptimalInsertionPointForCastOf(V));
1340}
1341
1342Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
1343 Value *V = expand(S->getOperand());
1344 return Builder.CreateTrunc(V, S->getType());
1345}
1346
1347Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
1348 Value *V = expand(S->getOperand());
1349 return Builder.CreateZExt(V, S->getType(), "",
1351}
1352
1353Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
1354 Value *V = expand(S->getOperand());
1355 return Builder.CreateSExt(V, S->getType());
1356}
1357
1358Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S,
1359 Intrinsic::ID IntrinID, Twine Name,
1360 bool IsSequential) {
1361 Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
1362 Type *Ty = LHS->getType();
1363 if (IsSequential)
1364 LHS = Builder.CreateFreeze(LHS);
1365 for (int i = S->getNumOperands() - 2; i >= 0; --i) {
1366 Value *RHS = expand(S->getOperand(i));
1367 if (IsSequential && i != 0)
1368 RHS = Builder.CreateFreeze(RHS);
1369 Value *Sel;
1370 if (Ty->isIntegerTy())
1371 Sel = Builder.CreateIntrinsic(IntrinID, {Ty}, {LHS, RHS},
1372 /*FMFSource=*/nullptr, Name);
1373 else {
1374 Value *ICmp =
1375 Builder.CreateICmp(MinMaxIntrinsic::getPredicate(IntrinID), LHS, RHS);
1376 Sel = Builder.CreateSelect(ICmp, LHS, RHS, Name);
1377 }
1378 LHS = Sel;
1379 }
1380 return LHS;
1381}
1382
1383Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
1384 return expandMinMaxExpr(S, Intrinsic::smax, "smax");
1385}
1386
1387Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
1388 return expandMinMaxExpr(S, Intrinsic::umax, "umax");
1389}
1390
1391Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
1392 return expandMinMaxExpr(S, Intrinsic::smin, "smin");
1393}
1394
1395Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
1396 return expandMinMaxExpr(S, Intrinsic::umin, "umin");
1397}
1398
1399Value *SCEVExpander::visitSequentialUMinExpr(const SCEVSequentialUMinExpr *S) {
1400 return expandMinMaxExpr(S, Intrinsic::umin, "umin", /*IsSequential*/true);
1401}
1402
1403Value *SCEVExpander::visitVScale(const SCEVVScale *S) {
1404 return Builder.CreateVScale(ConstantInt::get(S->getType(), 1));
1405}
1406
1409 setInsertPoint(IP);
1410 Value *V = expandCodeFor(SH, Ty);
1411 return V;
1412}
1413
1415 // Expand the code for this SCEV.
1416 Value *V = expand(SH);
1417
1418 if (Ty) {
1419 assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
1420 "non-trivial casts should be done with the SCEVs directly!");
1421 V = InsertNoopCastOfTo(V, Ty);
1422 }
1423 return V;
1424}
1425
1426Value *SCEVExpander::FindValueInExprValueMap(
1427 const SCEV *S, const Instruction *InsertPt,
1428 SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) {
1429 // If the expansion is not in CanonicalMode, and the SCEV contains any
1430 // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
1431 if (!CanonicalMode && SE.containsAddRecurrence(S))
1432 return nullptr;
1433
1434 // If S is a constant, it may be worse to reuse an existing Value.
1435 if (isa<SCEVConstant>(S))
1436 return nullptr;
1437
1438 for (Value *V : SE.getSCEVValues(S)) {
1439 Instruction *EntInst = dyn_cast<Instruction>(V);
1440 if (!EntInst)
1441 continue;
1442
1443 // Choose a Value from the set which dominates the InsertPt.
1444 // InsertPt should be inside the Value's parent loop so as not to break
1445 // the LCSSA form.
1446 assert(EntInst->getFunction() == InsertPt->getFunction());
1447 if (S->getType() != V->getType() || !SE.DT.dominates(EntInst, InsertPt) ||
1448 !(SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
1449 SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
1450 continue;
1451
1452 // Make sure reusing the instruction is poison-safe.
1453 if (SE.canReuseInstruction(S, EntInst, DropPoisonGeneratingInsts))
1454 return V;
1455 DropPoisonGeneratingInsts.clear();
1456 }
1457 return nullptr;
1458}
1459
1460// The expansion of SCEV will either reuse a previous Value in ExprValueMap,
1461// or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
1462// and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
1463// literally, to prevent LSR's transformed SCEV from being reverted. Otherwise,
1464// the expansion will try to reuse Value from ExprValueMap, and only when it
1465// fails, expand the SCEV literally.
1466Value *SCEVExpander::expand(const SCEV *S) {
1467 // Compute an insertion point for this SCEV object. Hoist the instructions
1468 // as far out in the loop nest as possible.
1469 BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
1470
1471 // We can move insertion point only if there is no div or rem operations
1472 // otherwise we are risky to move it over the check for zero denominator.
1473 auto SafeToHoist = [](const SCEV *S) {
1474 return !SCEVExprContains(S, [](const SCEV *S) {
1475 if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
1476 if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
1477 // Division by non-zero constants can be hoisted.
1478 return SC->getValue()->isZero();
1479 // All other divisions should not be moved as they may be
1480 // divisions by zero and should be kept within the
1481 // conditions of the surrounding loops that guard their
1482 // execution (see PR35406).
1483 return true;
1484 }
1485 return false;
1486 });
1487 };
1488 if (SafeToHoist(S)) {
1489 for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
1490 L = L->getParentLoop()) {
1491 if (SE.isLoopInvariant(S, L)) {
1492 if (!L) break;
1493 if (BasicBlock *Preheader = L->getLoopPreheader()) {
1494 InsertPt = Preheader->getTerminator()->getIterator();
1495 } else {
1496 // LSR sets the insertion point for AddRec start/step values to the
1497 // block start to simplify value reuse, even though it's an invalid
1498 // position. SCEVExpander must correct for this in all cases.
1499 InsertPt = L->getHeader()->getFirstInsertionPt();
1500 }
1501 } else {
1502 // If the SCEV is computable at this level, insert it into the header
1503 // after the PHIs (and after any other instructions that we've inserted
1504 // there) so that it is guaranteed to dominate any user inside the loop.
1505 if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
1506 InsertPt = L->getHeader()->getFirstInsertionPt();
1507
1508 while (InsertPt != Builder.GetInsertPoint() &&
1509 (isInsertedInstruction(&*InsertPt) ||
1510 isa<DbgInfoIntrinsic>(&*InsertPt))) {
1511 InsertPt = std::next(InsertPt);
1512 }
1513 break;
1514 }
1515 }
1516 }
1517
1518 // Check to see if we already expanded this here.
1519 auto I = InsertedExpressions.find(std::make_pair(S, &*InsertPt));
1520 if (I != InsertedExpressions.end())
1521 return I->second;
1522
1523 SCEVInsertPointGuard Guard(Builder, this);
1524 Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
1525
1526 // Expand the expression into instructions.
1527 SmallVector<Instruction *> DropPoisonGeneratingInsts;
1528 Value *V = FindValueInExprValueMap(S, &*InsertPt, DropPoisonGeneratingInsts);
1529 if (!V) {
1530 V = visit(S);
1531 V = fixupLCSSAFormFor(V);
1532 } else {
1533 for (Instruction *I : DropPoisonGeneratingInsts) {
1534 rememberFlags(I);
1535 I->dropPoisonGeneratingAnnotations();
1536 // See if we can re-infer from first principles any of the flags we just
1537 // dropped.
1538 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I))
1539 if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
1540 auto *BO = cast<BinaryOperator>(I);
1545 }
1546 if (auto *NNI = dyn_cast<PossiblyNonNegInst>(I)) {
1547 auto *Src = NNI->getOperand(0);
1549 Constant::getNullValue(Src->getType()), I,
1550 DL).value_or(false))
1551 NNI->setNonNeg(true);
1552 }
1553 }
1554 }
1555 // Remember the expanded value for this SCEV at this location.
1556 //
1557 // This is independent of PostIncLoops. The mapped value simply materializes
1558 // the expression at this insertion point. If the mapped value happened to be
1559 // a postinc expansion, it could be reused by a non-postinc user, but only if
1560 // its insertion point was already at the head of the loop.
1561 InsertedExpressions[std::make_pair(S, &*InsertPt)] = V;
1562 return V;
1563}
1564
1565void SCEVExpander::rememberInstruction(Value *I) {
1566 auto DoInsert = [this](Value *V) {
1567 if (!PostIncLoops.empty())
1568 InsertedPostIncValues.insert(V);
1569 else
1570 InsertedValues.insert(V);
1571 };
1572 DoInsert(I);
1573}
1574
1575void SCEVExpander::rememberFlags(Instruction *I) {
1576 // If we already have flags for the instruction, keep the existing ones.
1577 OrigFlags.try_emplace(I, PoisonFlags(I));
1578}
1579
1580void SCEVExpander::replaceCongruentIVInc(
1581 PHINode *&Phi, PHINode *&OrigPhi, Loop *L, const DominatorTree *DT,
1583 BasicBlock *LatchBlock = L->getLoopLatch();
1584 if (!LatchBlock)
1585 return;
1586
1587 Instruction *OrigInc =
1588 dyn_cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1589 Instruction *IsomorphicInc =
1590 dyn_cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
1591 if (!OrigInc || !IsomorphicInc)
1592 return;
1593
1594 // If this phi has the same width but is more canonical, replace the
1595 // original with it. As part of the "more canonical" determination,
1596 // respect a prior decision to use an IV chain.
1597 if (OrigPhi->getType() == Phi->getType() &&
1598 !(ChainedPhis.count(Phi) ||
1599 isExpandedAddRecExprPHI(OrigPhi, OrigInc, L)) &&
1600 (ChainedPhis.count(Phi) ||
1601 isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) {
1602 std::swap(OrigPhi, Phi);
1603 std::swap(OrigInc, IsomorphicInc);
1604 }
1605
1606 // Replacing the congruent phi is sufficient because acyclic
1607 // redundancy elimination, CSE/GVN, should handle the
1608 // rest. However, once SCEV proves that a phi is congruent,
1609 // it's often the head of an IV user cycle that is isomorphic
1610 // with the original phi. It's worth eagerly cleaning up the
1611 // common case of a single IV increment so that DeleteDeadPHIs
1612 // can remove cycles that had postinc uses.
1613 // Because we may potentially introduce a new use of OrigIV that didn't
1614 // exist before at this point, its poison flags need readjustment.
1615 const SCEV *TruncExpr =
1616 SE.getTruncateOrNoop(SE.getSCEV(OrigInc), IsomorphicInc->getType());
1617 if (OrigInc == IsomorphicInc || TruncExpr != SE.getSCEV(IsomorphicInc) ||
1618 !SE.LI.replacementPreservesLCSSAForm(IsomorphicInc, OrigInc))
1619 return;
1620
1621 bool BothHaveNUW = false;
1622 bool BothHaveNSW = false;
1623 auto *OBOIncV = dyn_cast<OverflowingBinaryOperator>(OrigInc);
1624 auto *OBOIsomorphic = dyn_cast<OverflowingBinaryOperator>(IsomorphicInc);
1625 if (OBOIncV && OBOIsomorphic) {
1626 BothHaveNUW =
1627 OBOIncV->hasNoUnsignedWrap() && OBOIsomorphic->hasNoUnsignedWrap();
1628 BothHaveNSW =
1629 OBOIncV->hasNoSignedWrap() && OBOIsomorphic->hasNoSignedWrap();
1630 }
1631
1632 if (!hoistIVInc(OrigInc, IsomorphicInc,
1633 /*RecomputePoisonFlags*/ true))
1634 return;
1635
1636 // We are replacing with a wider increment. If both OrigInc and IsomorphicInc
1637 // are NUW/NSW, then we can preserve them on the wider increment; the narrower
1638 // IsomorphicInc would wrap before the wider OrigInc, so the replacement won't
1639 // make IsomorphicInc's uses more poisonous.
1640 assert(OrigInc->getType()->getScalarSizeInBits() >=
1641 IsomorphicInc->getType()->getScalarSizeInBits() &&
1642 "Should only replace an increment with a wider one.");
1643 if (BothHaveNUW || BothHaveNSW) {
1644 OrigInc->setHasNoUnsignedWrap(OBOIncV->hasNoUnsignedWrap() || BothHaveNUW);
1645 OrigInc->setHasNoSignedWrap(OBOIncV->hasNoSignedWrap() || BothHaveNSW);
1646 }
1647
1648 SCEV_DEBUG_WITH_TYPE(DebugType,
1649 dbgs() << "INDVARS: Eliminated congruent iv.inc: "
1650 << *IsomorphicInc << '\n');
1651 Value *NewInc = OrigInc;
1652 if (OrigInc->getType() != IsomorphicInc->getType()) {
1654 if (PHINode *PN = dyn_cast<PHINode>(OrigInc))
1655 IP = PN->getParent()->getFirstInsertionPt();
1656 else
1657 IP = OrigInc->getNextNonDebugInstruction()->getIterator();
1658
1659 IRBuilder<> Builder(IP->getParent(), IP);
1660 Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
1661 NewInc =
1662 Builder.CreateTruncOrBitCast(OrigInc, IsomorphicInc->getType(), IVName);
1663 }
1664 IsomorphicInc->replaceAllUsesWith(NewInc);
1665 DeadInsts.emplace_back(IsomorphicInc);
1666}
1667
1668/// replaceCongruentIVs - Check for congruent phis in this loop header and
1669/// replace them with their most canonical representative. Return the number of
1670/// phis eliminated.
1671///
1672/// This does not depend on any SCEVExpander state but should be used in
1673/// the same context that SCEVExpander is used.
1674unsigned
1677 const TargetTransformInfo *TTI) {
1678 // Find integer phis in order of increasing width.
1680 for (PHINode &PN : L->getHeader()->phis())
1681 Phis.push_back(&PN);
1682
1683 if (TTI)
1684 // Use stable_sort to preserve order of equivalent PHIs, so the order
1685 // of the sorted Phis is the same from run to run on the same loop.
1686 llvm::stable_sort(Phis, [](Value *LHS, Value *RHS) {
1687 // Put pointers at the back and make sure pointer < pointer = false.
1688 if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
1689 return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
1692 });
1693
1694 unsigned NumElim = 0;
1696 // Process phis from wide to narrow. Map wide phis to their truncation
1697 // so narrow phis can reuse them.
1698 for (PHINode *Phi : Phis) {
1699 auto SimplifyPHINode = [&](PHINode *PN) -> Value * {
1700 if (Value *V = simplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC}))
1701 return V;
1702 if (!SE.isSCEVable(PN->getType()))
1703 return nullptr;
1704 auto *Const = dyn_cast<SCEVConstant>(SE.getSCEV(PN));
1705 if (!Const)
1706 return nullptr;
1707 return Const->getValue();
1708 };
1709
1710 // Fold constant phis. They may be congruent to other constant phis and
1711 // would confuse the logic below that expects proper IVs.
1712 if (Value *V = SimplifyPHINode(Phi)) {
1713 if (V->getType() != Phi->getType())
1714 continue;
1715 SE.forgetValue(Phi);
1716 Phi->replaceAllUsesWith(V);
1717 DeadInsts.emplace_back(Phi);
1718 ++NumElim;
1719 SCEV_DEBUG_WITH_TYPE(DebugType,
1720 dbgs() << "INDVARS: Eliminated constant iv: " << *Phi
1721 << '\n');
1722 continue;
1723 }
1724
1725 if (!SE.isSCEVable(Phi->getType()))
1726 continue;
1727
1728 PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
1729 if (!OrigPhiRef) {
1730 OrigPhiRef = Phi;
1731 if (Phi->getType()->isIntegerTy() && TTI &&
1732 TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
1733 // Make sure we only rewrite using simple induction variables;
1734 // otherwise, we can make the trip count of a loop unanalyzable
1735 // to SCEV.
1736 const SCEV *PhiExpr = SE.getSCEV(Phi);
1737 if (isa<SCEVAddRecExpr>(PhiExpr)) {
1738 // This phi can be freely truncated to the narrowest phi type. Map the
1739 // truncated expression to it so it will be reused for narrow types.
1740 const SCEV *TruncExpr =
1741 SE.getTruncateExpr(PhiExpr, Phis.back()->getType());
1742 ExprToIVMap[TruncExpr] = Phi;
1743 }
1744 }
1745 continue;
1746 }
1747
1748 // Replacing a pointer phi with an integer phi or vice-versa doesn't make
1749 // sense.
1750 if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
1751 continue;
1752
1753 replaceCongruentIVInc(Phi, OrigPhiRef, L, DT, DeadInsts);
1754 SCEV_DEBUG_WITH_TYPE(DebugType,
1755 dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi
1756 << '\n');
1758 DebugType, dbgs() << "INDVARS: Original iv: " << *OrigPhiRef << '\n');
1759 ++NumElim;
1760 Value *NewIV = OrigPhiRef;
1761 if (OrigPhiRef->getType() != Phi->getType()) {
1762 IRBuilder<> Builder(L->getHeader(),
1763 L->getHeader()->getFirstInsertionPt());
1764 Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
1765 NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
1766 }
1767 Phi->replaceAllUsesWith(NewIV);
1768 DeadInsts.emplace_back(Phi);
1769 }
1770 return NumElim;
1771}
1772
1774 const Instruction *At,
1775 Loop *L) {
1776 using namespace llvm::PatternMatch;
1777
1778 SmallVector<BasicBlock *, 4> ExitingBlocks;
1779 L->getExitingBlocks(ExitingBlocks);
1780
1781 // Look for suitable value in simple conditions at the loop exits.
1782 for (BasicBlock *BB : ExitingBlocks) {
1784 Instruction *LHS, *RHS;
1785
1786 if (!match(BB->getTerminator(),
1789 continue;
1790
1791 if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
1792 return true;
1793
1794 if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
1795 return true;
1796 }
1797
1798 // Use expand's logic which is used for reusing a previous Value in
1799 // ExprValueMap. Note that we don't currently model the cost of
1800 // needing to drop poison generating flags on the instruction if we
1801 // want to reuse it. We effectively assume that has zero cost.
1802 SmallVector<Instruction *> DropPoisonGeneratingInsts;
1803 return FindValueInExprValueMap(S, At, DropPoisonGeneratingInsts) != nullptr;
1804}
1805
1806template<typename T> static InstructionCost costAndCollectOperands(
1809 SmallVectorImpl<SCEVOperand> &Worklist) {
1810
1811 const T *S = cast<T>(WorkItem.S);
1813 // Object to help map SCEV operands to expanded IR instructions.
1814 struct OperationIndices {
1815 OperationIndices(unsigned Opc, size_t min, size_t max) :
1816 Opcode(Opc), MinIdx(min), MaxIdx(max) { }
1817 unsigned Opcode;
1818 size_t MinIdx;
1819 size_t MaxIdx;
1820 };
1821
1822 // Collect the operations of all the instructions that will be needed to
1823 // expand the SCEVExpr. This is so that when we come to cost the operands,
1824 // we know what the generated user(s) will be.
1826
1827 auto CastCost = [&](unsigned Opcode) -> InstructionCost {
1828 Operations.emplace_back(Opcode, 0, 0);
1829 return TTI.getCastInstrCost(Opcode, S->getType(),
1830 S->getOperand(0)->getType(),
1832 };
1833
1834 auto ArithCost = [&](unsigned Opcode, unsigned NumRequired,
1835 unsigned MinIdx = 0,
1836 unsigned MaxIdx = 1) -> InstructionCost {
1837 Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1838 return NumRequired *
1839 TTI.getArithmeticInstrCost(Opcode, S->getType(), CostKind);
1840 };
1841
1842 auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, unsigned MinIdx,
1843 unsigned MaxIdx) -> InstructionCost {
1844 Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1845 Type *OpType = S->getType();
1846 return NumRequired * TTI.getCmpSelInstrCost(
1847 Opcode, OpType, CmpInst::makeCmpResultType(OpType),
1849 };
1850
1851 switch (S->getSCEVType()) {
1852 case scCouldNotCompute:
1853 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1854 case scUnknown:
1855 case scConstant:
1856 case scVScale:
1857 return 0;
1858 case scPtrToInt:
1859 Cost = CastCost(Instruction::PtrToInt);
1860 break;
1861 case scTruncate:
1862 Cost = CastCost(Instruction::Trunc);
1863 break;
1864 case scZeroExtend:
1865 Cost = CastCost(Instruction::ZExt);
1866 break;
1867 case scSignExtend:
1868 Cost = CastCost(Instruction::SExt);
1869 break;
1870 case scUDivExpr: {
1871 unsigned Opcode = Instruction::UDiv;
1872 if (auto *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
1873 if (SC->getAPInt().isPowerOf2())
1874 Opcode = Instruction::LShr;
1875 Cost = ArithCost(Opcode, 1);
1876 break;
1877 }
1878 case scAddExpr:
1879 Cost = ArithCost(Instruction::Add, S->getNumOperands() - 1);
1880 break;
1881 case scMulExpr:
1882 // TODO: this is a very pessimistic cost modelling for Mul,
1883 // because of Bin Pow algorithm actually used by the expander,
1884 // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
1885 Cost = ArithCost(Instruction::Mul, S->getNumOperands() - 1);
1886 break;
1887 case scSMaxExpr:
1888 case scUMaxExpr:
1889 case scSMinExpr:
1890 case scUMinExpr:
1891 case scSequentialUMinExpr: {
1892 // FIXME: should this ask the cost for Intrinsic's?
1893 // The reduction tree.
1894 Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1);
1895 Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2);
1896 switch (S->getSCEVType()) {
1897 case scSequentialUMinExpr: {
1898 // The safety net against poison.
1899 // FIXME: this is broken.
1900 Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 0);
1901 Cost += ArithCost(Instruction::Or,
1902 S->getNumOperands() > 2 ? S->getNumOperands() - 2 : 0);
1903 Cost += CmpSelCost(Instruction::Select, 1, 0, 1);
1904 break;
1905 }
1906 default:
1907 assert(!isa<SCEVSequentialMinMaxExpr>(S) &&
1908 "Unhandled SCEV expression type?");
1909 break;
1910 }
1911 break;
1912 }
1913 case scAddRecExpr: {
1914 // In this polynominal, we may have some zero operands, and we shouldn't
1915 // really charge for those. So how many non-zero coefficients are there?
1916 int NumTerms = llvm::count_if(S->operands(), [](const SCEV *Op) {
1917 return !Op->isZero();
1918 });
1919
1920 assert(NumTerms >= 1 && "Polynominal should have at least one term.");
1921 assert(!(*std::prev(S->operands().end()))->isZero() &&
1922 "Last operand should not be zero");
1923
1924 // Ignoring constant term (operand 0), how many of the coefficients are u> 1?
1925 int NumNonZeroDegreeNonOneTerms =
1926 llvm::count_if(S->operands(), [](const SCEV *Op) {
1927 auto *SConst = dyn_cast<SCEVConstant>(Op);
1928 return !SConst || SConst->getAPInt().ugt(1);
1929 });
1930
1931 // Much like with normal add expr, the polynominal will require
1932 // one less addition than the number of it's terms.
1933 InstructionCost AddCost = ArithCost(Instruction::Add, NumTerms - 1,
1934 /*MinIdx*/ 1, /*MaxIdx*/ 1);
1935 // Here, *each* one of those will require a multiplication.
1936 InstructionCost MulCost =
1937 ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms);
1938 Cost = AddCost + MulCost;
1939
1940 // What is the degree of this polynominal?
1941 int PolyDegree = S->getNumOperands() - 1;
1942 assert(PolyDegree >= 1 && "Should be at least affine.");
1943
1944 // The final term will be:
1945 // Op_{PolyDegree} * x ^ {PolyDegree}
1946 // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations.
1947 // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for
1948 // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free.
1949 // FIXME: this is conservatively correct, but might be overly pessimistic.
1950 Cost += MulCost * (PolyDegree - 1);
1951 break;
1952 }
1953 }
1954
1955 for (auto &CostOp : Operations) {
1956 for (auto SCEVOp : enumerate(S->operands())) {
1957 // Clamp the index to account for multiple IR operations being chained.
1958 size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx);
1959 size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx);
1960 Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value());
1961 }
1962 }
1963 return Cost;
1964}
1965
1966bool SCEVExpander::isHighCostExpansionHelper(
1967 const SCEVOperand &WorkItem, Loop *L, const Instruction &At,
1968 InstructionCost &Cost, unsigned Budget, const TargetTransformInfo &TTI,
1970 SmallVectorImpl<SCEVOperand> &Worklist) {
1971 if (Cost > Budget)
1972 return true; // Already run out of budget, give up.
1973
1974 const SCEV *S = WorkItem.S;
1975 // Was the cost of expansion of this expression already accounted for?
1976 if (!isa<SCEVConstant>(S) && !Processed.insert(S).second)
1977 return false; // We have already accounted for this expression.
1978
1979 // If we can find an existing value for this scev available at the point "At"
1980 // then consider the expression cheap.
1981 if (hasRelatedExistingExpansion(S, &At, L))
1982 return false; // Consider the expression to be free.
1983
1985 L->getHeader()->getParent()->hasMinSize()
1988
1989 switch (S->getSCEVType()) {
1990 case scCouldNotCompute:
1991 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1992 case scUnknown:
1993 case scVScale:
1994 // Assume to be zero-cost.
1995 return false;
1996 case scConstant: {
1997 // Only evalulate the costs of constants when optimizing for size.
1999 return false;
2000 const APInt &Imm = cast<SCEVConstant>(S)->getAPInt();
2001 Type *Ty = S->getType();
2003 WorkItem.ParentOpcode, WorkItem.OperandIdx, Imm, Ty, CostKind);
2004 return Cost > Budget;
2005 }
2006 case scTruncate:
2007 case scPtrToInt:
2008 case scZeroExtend:
2009 case scSignExtend: {
2010 Cost +=
2011 costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist);
2012 return false; // Will answer upon next entry into this function.
2013 }
2014 case scUDivExpr: {
2015 // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
2016 // HowManyLessThans produced to compute a precise expression, rather than a
2017 // UDiv from the user's code. If we can't find a UDiv in the code with some
2018 // simple searching, we need to account for it's cost.
2019
2020 // At the beginning of this function we already tried to find existing
2021 // value for plain 'S'. Now try to lookup 'S + 1' since it is common
2022 // pattern involving division. This is just a simple search heuristic.
2024 SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L))
2025 return false; // Consider it to be free.
2026
2027 Cost +=
2028 costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist);
2029 return false; // Will answer upon next entry into this function.
2030 }
2031 case scAddExpr:
2032 case scMulExpr:
2033 case scUMaxExpr:
2034 case scSMaxExpr:
2035 case scUMinExpr:
2036 case scSMinExpr:
2037 case scSequentialUMinExpr: {
2038 assert(cast<SCEVNAryExpr>(S)->getNumOperands() > 1 &&
2039 "Nary expr should have more than 1 operand.");
2040 // The simple nary expr will require one less op (or pair of ops)
2041 // than the number of it's terms.
2042 Cost +=
2043 costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist);
2044 return Cost > Budget;
2045 }
2046 case scAddRecExpr: {
2047 assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= 2 &&
2048 "Polynomial should be at least linear");
2049 Cost += costAndCollectOperands<SCEVAddRecExpr>(
2050 WorkItem, TTI, CostKind, Worklist);
2051 return Cost > Budget;
2052 }
2053 }
2054 llvm_unreachable("Unknown SCEV kind!");
2055}
2056
2058 Instruction *IP) {
2059 assert(IP);
2060 switch (Pred->getKind()) {
2062 return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP);
2064 return expandComparePredicate(cast<SCEVComparePredicate>(Pred), IP);
2065 case SCEVPredicate::P_Wrap: {
2066 auto *AddRecPred = cast<SCEVWrapPredicate>(Pred);
2067 return expandWrapPredicate(AddRecPred, IP);
2068 }
2069 }
2070 llvm_unreachable("Unknown SCEV predicate type");
2071}
2072
2074 Instruction *IP) {
2075 Value *Expr0 = expand(Pred->getLHS(), IP);
2076 Value *Expr1 = expand(Pred->getRHS(), IP);
2077
2078 Builder.SetInsertPoint(IP);
2079 auto InvPred = ICmpInst::getInversePredicate(Pred->getPredicate());
2080 auto *I = Builder.CreateICmp(InvPred, Expr0, Expr1, "ident.check");
2081 return I;
2082}
2083
2085 Instruction *Loc, bool Signed) {
2086 assert(AR->isAffine() && "Cannot generate RT check for "
2087 "non-affine expression");
2088
2089 // FIXME: It is highly suspicious that we're ignoring the predicates here.
2091 const SCEV *ExitCount =
2093
2094 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count");
2095
2096 const SCEV *Step = AR->getStepRecurrence(SE);
2097 const SCEV *Start = AR->getStart();
2098
2099 Type *ARTy = AR->getType();
2100 unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType());
2101 unsigned DstBits = SE.getTypeSizeInBits(ARTy);
2102
2103 // The expression {Start,+,Step} has nusw/nssw if
2104 // Step < 0, Start - |Step| * Backedge <= Start
2105 // Step >= 0, Start + |Step| * Backedge > Start
2106 // and |Step| * Backedge doesn't unsigned overflow.
2107
2108 Builder.SetInsertPoint(Loc);
2109 Value *TripCountVal = expand(ExitCount, Loc);
2110
2111 IntegerType *Ty =
2113
2114 Value *StepValue = expand(Step, Loc);
2115 Value *NegStepValue = expand(SE.getNegativeSCEV(Step), Loc);
2116 Value *StartValue = expand(Start, Loc);
2117
2118 ConstantInt *Zero =
2119 ConstantInt::get(Loc->getContext(), APInt::getZero(DstBits));
2120
2121 Builder.SetInsertPoint(Loc);
2122 // Compute |Step|
2123 Value *StepCompare = Builder.CreateICmp(ICmpInst::ICMP_SLT, StepValue, Zero);
2124 Value *AbsStep = Builder.CreateSelect(StepCompare, NegStepValue, StepValue);
2125
2126 // Compute |Step| * Backedge
2127 // Compute:
2128 // 1. Start + |Step| * Backedge < Start
2129 // 2. Start - |Step| * Backedge > Start
2130 //
2131 // And select either 1. or 2. depending on whether step is positive or
2132 // negative. If Step is known to be positive or negative, only create
2133 // either 1. or 2.
2134 auto ComputeEndCheck = [&]() -> Value * {
2135 // Checking <u 0 is always false.
2136 if (!Signed && Start->isZero() && SE.isKnownPositive(Step))
2137 return ConstantInt::getFalse(Loc->getContext());
2138
2139 // Get the backedge taken count and truncate or extended to the AR type.
2140 Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
2141
2142 Value *MulV, *OfMul;
2143 if (Step->isOne()) {
2144 // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
2145 // needed, there is never an overflow, so to avoid artificially inflating
2146 // the cost of the check, directly emit the optimized IR.
2147 MulV = TruncTripCount;
2148 OfMul = ConstantInt::getFalse(MulV->getContext());
2149 } else {
2150 auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
2151 Intrinsic::umul_with_overflow, Ty);
2152 CallInst *Mul =
2153 Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
2154 MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
2155 OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
2156 }
2157
2158 Value *Add = nullptr, *Sub = nullptr;
2159 bool NeedPosCheck = !SE.isKnownNegative(Step);
2160 bool NeedNegCheck = !SE.isKnownPositive(Step);
2161
2162 if (isa<PointerType>(ARTy)) {
2163 Value *NegMulV = Builder.CreateNeg(MulV);
2164 if (NeedPosCheck)
2165 Add = Builder.CreatePtrAdd(StartValue, MulV);
2166 if (NeedNegCheck)
2167 Sub = Builder.CreatePtrAdd(StartValue, NegMulV);
2168 } else {
2169 if (NeedPosCheck)
2170 Add = Builder.CreateAdd(StartValue, MulV);
2171 if (NeedNegCheck)
2172 Sub = Builder.CreateSub(StartValue, MulV);
2173 }
2174
2175 Value *EndCompareLT = nullptr;
2176 Value *EndCompareGT = nullptr;
2177 Value *EndCheck = nullptr;
2178 if (NeedPosCheck)
2179 EndCheck = EndCompareLT = Builder.CreateICmp(
2181 if (NeedNegCheck)
2182 EndCheck = EndCompareGT = Builder.CreateICmp(
2183 Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
2184 if (NeedPosCheck && NeedNegCheck) {
2185 // Select the answer based on the sign of Step.
2186 EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
2187 }
2188 return Builder.CreateOr(EndCheck, OfMul);
2189 };
2190 Value *EndCheck = ComputeEndCheck();
2191
2192 // If the backedge taken count type is larger than the AR type,
2193 // check that we don't drop any bits by truncating it. If we are
2194 // dropping bits, then we have overflow (unless the step is zero).
2195 if (SrcBits > DstBits) {
2196 auto MaxVal = APInt::getMaxValue(DstBits).zext(SrcBits);
2197 auto *BackedgeCheck =
2198 Builder.CreateICmp(ICmpInst::ICMP_UGT, TripCountVal,
2199 ConstantInt::get(Loc->getContext(), MaxVal));
2200 BackedgeCheck = Builder.CreateAnd(
2201 BackedgeCheck, Builder.CreateICmp(ICmpInst::ICMP_NE, StepValue, Zero));
2202
2203 EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck);
2204 }
2205
2206 return EndCheck;
2207}
2208
2210 Instruction *IP) {
2211 const auto *A = cast<SCEVAddRecExpr>(Pred->getExpr());
2212 Value *NSSWCheck = nullptr, *NUSWCheck = nullptr;
2213
2214 // Add a check for NUSW
2216 NUSWCheck = generateOverflowCheck(A, IP, false);
2217
2218 // Add a check for NSSW
2220 NSSWCheck = generateOverflowCheck(A, IP, true);
2221
2222 if (NUSWCheck && NSSWCheck)
2223 return Builder.CreateOr(NUSWCheck, NSSWCheck);
2224
2225 if (NUSWCheck)
2226 return NUSWCheck;
2227
2228 if (NSSWCheck)
2229 return NSSWCheck;
2230
2231 return ConstantInt::getFalse(IP->getContext());
2232}
2233
2235 Instruction *IP) {
2236 // Loop over all checks in this set.
2237 SmallVector<Value *> Checks;
2238 for (const auto *Pred : Union->getPredicates()) {
2239 Checks.push_back(expandCodeForPredicate(Pred, IP));
2240 Builder.SetInsertPoint(IP);
2241 }
2242
2243 if (Checks.empty())
2244 return ConstantInt::getFalse(IP->getContext());
2245 return Builder.CreateOr(Checks);
2246}
2247
2248Value *SCEVExpander::fixupLCSSAFormFor(Value *V) {
2249 auto *DefI = dyn_cast<Instruction>(V);
2250 if (!PreserveLCSSA || !DefI)
2251 return V;
2252
2253 BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
2254 Loop *DefLoop = SE.LI.getLoopFor(DefI->getParent());
2255 Loop *UseLoop = SE.LI.getLoopFor(InsertPt->getParent());
2256 if (!DefLoop || UseLoop == DefLoop || DefLoop->contains(UseLoop))
2257 return V;
2258
2259 // Create a temporary instruction to at the current insertion point, so we
2260 // can hand it off to the helper to create LCSSA PHIs if required for the
2261 // new use.
2262 // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor)
2263 // would accept a insertion point and return an LCSSA phi for that
2264 // insertion point, so there is no need to insert & remove the temporary
2265 // instruction.
2266 Type *ToTy;
2267 if (DefI->getType()->isIntegerTy())
2268 ToTy = PointerType::get(DefI->getContext(), 0);
2269 else
2270 ToTy = Type::getInt32Ty(DefI->getContext());
2271 Instruction *User =
2272 CastInst::CreateBitOrPointerCast(DefI, ToTy, "tmp.lcssa.user", InsertPt);
2273 auto RemoveUserOnExit =
2274 make_scope_exit([User]() { User->eraseFromParent(); });
2275
2277 ToUpdate.push_back(DefI);
2278 SmallVector<PHINode *, 16> PHIsToRemove;
2279 SmallVector<PHINode *, 16> InsertedPHIs;
2280 formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE, &PHIsToRemove,
2281 &InsertedPHIs);
2282 for (PHINode *PN : InsertedPHIs)
2283 rememberInstruction(PN);
2284 for (PHINode *PN : PHIsToRemove) {
2285 if (!PN->use_empty())
2286 continue;
2287 InsertedValues.erase(PN);
2288 InsertedPostIncValues.erase(PN);
2289 PN->eraseFromParent();
2290 }
2291
2292 return User->getOperand(0);
2293}
2294
2295namespace {
2296// Search for a SCEV subexpression that is not safe to expand. Any expression
2297// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
2298// UDiv expressions. We don't know if the UDiv is derived from an IR divide
2299// instruction, but the important thing is that we prove the denominator is
2300// nonzero before expansion.
2301//
2302// IVUsers already checks that IV-derived expressions are safe. So this check is
2303// only needed when the expression includes some subexpression that is not IV
2304// derived.
2305//
2306// Currently, we only allow division by a value provably non-zero here.
2307//
2308// We cannot generally expand recurrences unless the step dominates the loop
2309// header. The expander handles the special case of affine recurrences by
2310// scaling the recurrence outside the loop, but this technique isn't generally
2311// applicable. Expanding a nested recurrence outside a loop requires computing
2312// binomial coefficients. This could be done, but the recurrence has to be in a
2313// perfectly reduced form, which can't be guaranteed.
2314struct SCEVFindUnsafe {
2315 ScalarEvolution &SE;
2316 bool CanonicalMode;
2317 bool IsUnsafe = false;
2318
2319 SCEVFindUnsafe(ScalarEvolution &SE, bool CanonicalMode)
2320 : SE(SE), CanonicalMode(CanonicalMode) {}
2321
2322 bool follow(const SCEV *S) {
2323 if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
2324 if (!SE.isKnownNonZero(D->getRHS())) {
2325 IsUnsafe = true;
2326 return false;
2327 }
2328 }
2329 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
2330 // For non-affine addrecs or in non-canonical mode we need a preheader
2331 // to insert into.
2332 if (!AR->getLoop()->getLoopPreheader() &&
2333 (!CanonicalMode || !AR->isAffine())) {
2334 IsUnsafe = true;
2335 return false;
2336 }
2337 }
2338 return true;
2339 }
2340 bool isDone() const { return IsUnsafe; }
2341};
2342} // namespace
2343
2345 SCEVFindUnsafe Search(SE, CanonicalMode);
2346 visitAll(S, Search);
2347 return !Search.IsUnsafe;
2348}
2349
2351 const Instruction *InsertionPoint) const {
2352 if (!isSafeToExpand(S))
2353 return false;
2354 // We have to prove that the expanded site of S dominates InsertionPoint.
2355 // This is easy when not in the same block, but hard when S is an instruction
2356 // to be expanded somewhere inside the same block as our insertion point.
2357 // What we really need here is something analogous to an OrderedBasicBlock,
2358 // but for the moment, we paper over the problem by handling two common and
2359 // cheap to check cases.
2360 if (SE.properlyDominates(S, InsertionPoint->getParent()))
2361 return true;
2362 if (SE.dominates(S, InsertionPoint->getParent())) {
2363 if (InsertionPoint->getParent()->getTerminator() == InsertionPoint)
2364 return true;
2365 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S))
2366 if (llvm::is_contained(InsertionPoint->operand_values(), U->getValue()))
2367 return true;
2368 }
2369 return false;
2370}
2371
2373 // Result is used, nothing to remove.
2374 if (ResultUsed)
2375 return;
2376
2377 // Restore original poison flags.
2378 for (auto [I, Flags] : Expander.OrigFlags)
2379 Flags.apply(I);
2380
2381 auto InsertedInstructions = Expander.getAllInsertedInstructions();
2382#ifndef NDEBUG
2383 SmallPtrSet<Instruction *, 8> InsertedSet(InsertedInstructions.begin(),
2384 InsertedInstructions.end());
2385 (void)InsertedSet;
2386#endif
2387 // Remove sets with value handles.
2388 Expander.clear();
2389
2390 // Remove all inserted instructions.
2391 for (Instruction *I : reverse(InsertedInstructions)) {
2392#ifndef NDEBUG
2393 assert(all_of(I->users(),
2394 [&InsertedSet](Value *U) {
2395 return InsertedSet.contains(cast<Instruction>(U));
2396 }) &&
2397 "removed instruction should only be used by instructions inserted "
2398 "during expansion");
2399#endif
2400 assert(!I->getType()->isVoidTy() &&
2401 "inserted instruction should have non-void types");
2402 I->replaceAllUsesWith(PoisonValue::get(I->getType()));
2403 I->eraseFromParent();
2404 }
2405}
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Hardware Loops
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR)
static const Loop * PickMostRelevantLoop(const Loop *A, const Loop *B, DominatorTree &DT)
PickMostRelevantLoop - Given two loops pick the one that's most relevant for SCEV expansion.
static InstructionCost costAndCollectOperands(const SCEVOperand &WorkItem, const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, SmallVectorImpl< SCEVOperand > &Worklist)
static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR)
static bool canBeCheaplyTransformed(ScalarEvolution &SE, const SCEVAddRecExpr *Phi, const SCEVAddRecExpr *Requested, bool &InvertStep)
Check whether we can cheaply express the requested SCEV in terms of the available PHI SCEV by truncat...
#define SCEV_DEBUG_WITH_TYPE(TYPE, X)
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallSet class.
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:77
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:185
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:179
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:438
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:414
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:209
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:167
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:229
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:530
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:694
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:1104
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:786
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:780
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:784
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:785
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:871
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1084
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:2146
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:857
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
This class represents an Operation in the Expression.
bool isNonIntegralPointerType(PointerType *PT) const
Definition: DataLayout.h:398
A debug info location.
Definition: DebugLoc.h:33
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
const BasicBlock & getEntryBlock() const
Definition: Function.h:800
Value * CreateVScale(Constant *Scaling, const Twine &Name="")
Create a call to llvm.vscale, multiplied by Scaling.
Definition: IRBuilder.cpp:88
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2037
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2514
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1090
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:173
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2031
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2533
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1974
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:172
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:218
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition: IRBuilder.h:1719
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2395
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Definition: IRBuilder.h:143
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1342
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2019
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1473
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1325
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2005
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1495
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2159
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:178
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2410
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2151
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2349
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:514
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2664
void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:97
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:476
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
const Instruction * getNextNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the next non-debug instruction in the same basic block as 'this',...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:473
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:444
bool movementPreservesLCSSAForm(Instruction *Inst, Instruction *NewLoc)
Checks if moving a specific instruction can break LCSSA in any loop.
Definition: LoopInfo.h:470
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
ICmpInst::Predicate getPredicate() const
Returns the comparison predicate underlying the intrinsic.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
bool isComplete() const
If the PHI node is complete which means all of its parent's predecessors have incoming value in this ...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1814
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const SCEV * getOperand() const
This class represents an assumption that the expression LHS Pred RHS evaluates to true,...
const SCEV * getRHS() const
Returns the right hand side of the predicate.
ICmpInst::Predicate getPredicate() const
const SCEV * getLHS() const
Returns the left hand side of the predicate.
This class represents a constant integer value.
Value * generateOverflowCheck(const SCEVAddRecExpr *AR, Instruction *Loc, bool Signed)
Generates code that evaluates if the AR expression will overflow.
bool hasRelatedExistingExpansion(const SCEV *S, const Instruction *At, Loop *L)
Determine whether there is an existing expansion of S that can be reused.
SmallVector< Instruction *, 32 > getAllInsertedInstructions() const
Return a vector containing all instructions inserted during expansion.
bool isSafeToExpand(const SCEV *S) const
Return true if the given expression is safe to expand in the sense that all materialized values are s...
bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint) const
Return true if the given expression is safe to expand in the sense that all materialized values are d...
unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetTransformInfo *TTI=nullptr)
replace congruent phis with their most canonical representative.
Value * expandUnionPredicate(const SCEVUnionPredicate *Pred, Instruction *Loc)
A specialized variant of expandCodeForPredicate, handling the case when we are expanding code for a S...
bool hoistIVInc(Instruction *IncV, Instruction *InsertPos, bool RecomputePoisonFlags=false)
Utility for hoisting IncV (with all subexpressions requried for its computation) before InsertPos.
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
bool isInsertedInstruction(Instruction *I) const
Return true if the specified instruction was inserted by the code rewriter.
Value * expandCodeForPredicate(const SCEVPredicate *Pred, Instruction *Loc)
Generates a code sequence that evaluates this predicate.
static bool canReuseFlagsFromOriginalIVInc(PHINode *OrigPhi, PHINode *WidePhi, Instruction *OrigInc, Instruction *WideInc)
Return true if both increments directly increment the corresponding IV PHI nodes and have the same op...
Value * expandComparePredicate(const SCEVComparePredicate *Pred, Instruction *Loc)
A specialized variant of expandCodeForPredicate, handling the case when we are expanding code for a S...
Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
Value * expandWrapPredicate(const SCEVWrapPredicate *P, Instruction *Loc)
A specialized variant of expandCodeForPredicate, handling the case when we are expanding code for a S...
Instruction * getIVIncOperand(Instruction *IncV, Instruction *InsertPos, bool allowScale)
Return the induction variable increment's IV operand.
BasicBlock::iterator findInsertPointAfter(Instruction *I, Instruction *MustDominate) const
Returns a suitable insert point after I, that dominates MustDominate.
void setInsertPoint(Instruction *IP)
Set the current insertion point.
This node represents multiplication of some number of SCEVs.
This node is a base class providing common functionality for n'ary operators.
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
const SCEV * getOperand(unsigned i) const
ArrayRef< const SCEV * > operands() const
This class represents an assumption made using SCEV expressions which can be checked at run-time.
SCEVPredicateKind getKind() const
This class represents a cast from a pointer to a pointer-sized integer value.
This class represents a signed maximum selection.
This class represents a signed minimum selection.
This class represents a sequential/in-order unsigned minimum selection.
This class represents a sign extension of a small integer value to a larger integer value.
This class represents a truncation of an integer value to a smaller integer value.
This class represents a binary unsigned division operation.
const SCEV * getLHS() const
const SCEV * getRHS() const
This class represents an unsigned maximum selection.
This class represents an unsigned minimum selection.
This class represents a composition of other SCEV predicates, and is the class that most clients will...
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents the value of vscale, as used when defining the length of a scalable vector or r...
This class represents an assumption made on an AddRec expression.
const SCEVAddRecExpr * getExpr() const
Implementation of the SCEVPredicate interface.
IncrementWrapFlags getFlags() const
Returns the set assumed no overflow flags.
This class represents a zero extension of a small integer value to a larger integer value.
This class represents an analyzed expression in the program.
ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
bool isOne() const
Return true if the expression is a constant one.
bool isZero() const
Return true if the expression is a constant zero.
bool isNonConstantNegative() const
Return true if the specified scev is negated, but not a constant.
SCEVTypes getSCEVType() const
Type * getType() const
Return the LLVM type of this SCEV expression.
NoWrapFlags
NoWrapFlags are bitfield indices into SubclassData.
The main scalar evolution driver.
bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
const SCEV * removePointerBase(const SCEV *S)
Compute an expression equivalent to S - getPointerBase(S).
bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getTruncateOrNoop(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
bool containsAddRecurrence(const SCEV *S)
Return true if the SCEV is a scAddRecExpr or it contains scAddRecExpr.
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
static SCEV::NoWrapFlags clearFlags(SCEV::NoWrapFlags Flags, SCEV::NoWrapFlags OffFlags)
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
const SCEV * getNoopOrAnyExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
std::optional< SCEV::NoWrapFlags > getStrengthenedNoWrapFlagsFromBinOp(const OverflowingBinaryOperator *OBO)
Parse NSW/NUW flags from add/sub/mul IR binary operation Op into SCEV no-wrap flags,...
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
bool dominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV dominate the specified basic block.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getUnknown(Value *V)
static SCEV::NoWrapFlags maskFlags(SCEV::NoWrapFlags Flags, int Mask)
Convenient NoWrapFlags manipulation that hides enum casts and is visible in the ScalarEvolution name ...
bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
bool canReuseInstruction(const SCEV *S, Instruction *I, SmallVectorImpl< Instruction * > &DropPoisonGeneratingInsts)
Check whether it is poison-safe to represent the expression S using the instruction I.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
const SCEV * getPredicatedSymbolicMaxBackedgeTakenCount(const Loop *L, SmallVector< const SCEVPredicate *, 4 > &Predicates)
Similar to getSymbolicMaxBackedgeTakenCount, except it will add a set of SCEV predicates to Predicate...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:323
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:412
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
@ None
The cast is not used with a load/store of any kind.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
iterator_range< value_op_iterator > operand_values()
Definition: User.h:266
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:199
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define UINT64_MAX
Definition: DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1484
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
Definition: PatternMatch.h:619
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:816
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:875
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
Definition: PatternMatch.h:189
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void visitAll(const SCEV *Root, SV &Visitor)
Use SCEVTraversal to visit all nodes in the given expression tree.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
pred_iterator pred_end(BasicBlock *BB)
Definition: CFG.h:114
@ Offset
Definition: DWP.cpp:480
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2400
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
pred_iterator pred_begin(BasicBlock *BB)
Definition: CFG.h:110
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
cl::opt< unsigned > SCEVCheapExpansionBudget
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
@ Mul
Product of integers.
@ Add
Sum of integers.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
Definition: LCSSA.cpp:77
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
InstructionCost Cost
std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
unsigned pred_size(const MachineBasicBlock *BB)
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
void apply(Instruction *I)
PoisonFlags(const Instruction *I)
struct for holding enough information to help calculate the cost of the given SCEV when expanded into...