LLVM 20.0.0git
ScalarEvolutionExpander.cpp
Go to the documentation of this file.
1//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the implementation of the scalar evolution expander,
10// which is used to generate the code corresponding to a given scalar evolution
11// expression.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/ScopeExit.h"
18#include "llvm/ADT/SmallSet.h"
23#include "llvm/IR/DataLayout.h"
24#include "llvm/IR/Dominators.h"
30
31#if LLVM_ENABLE_ABI_BREAKING_CHECKS
32#define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X)
33#else
34#define SCEV_DEBUG_WITH_TYPE(TYPE, X)
35#endif
36
37using namespace llvm;
38
40 "scev-cheap-expansion-budget", cl::Hidden, cl::init(4),
41 cl::desc("When performing SCEV expansion only if it is cheap to do, this "
42 "controls the budget that is considered cheap (default = 4)"));
43
44using namespace PatternMatch;
45
47 NUW = false;
48 NSW = false;
49 Exact = false;
50 Disjoint = false;
51 NNeg = false;
52 SameSign = false;
54 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I)) {
55 NUW = OBO->hasNoUnsignedWrap();
56 NSW = OBO->hasNoSignedWrap();
57 }
58 if (auto *PEO = dyn_cast<PossiblyExactOperator>(I))
59 Exact = PEO->isExact();
60 if (auto *PDI = dyn_cast<PossiblyDisjointInst>(I))
61 Disjoint = PDI->isDisjoint();
62 if (auto *PNI = dyn_cast<PossiblyNonNegInst>(I))
63 NNeg = PNI->hasNonNeg();
64 if (auto *TI = dyn_cast<TruncInst>(I)) {
65 NUW = TI->hasNoUnsignedWrap();
66 NSW = TI->hasNoSignedWrap();
67 }
68 if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
69 GEPNW = GEP->getNoWrapFlags();
70 if (auto *ICmp = dyn_cast<ICmpInst>(I))
71 SameSign = ICmp->hasSameSign();
72}
73
75 if (isa<OverflowingBinaryOperator>(I)) {
76 I->setHasNoUnsignedWrap(NUW);
77 I->setHasNoSignedWrap(NSW);
78 }
79 if (isa<PossiblyExactOperator>(I))
80 I->setIsExact(Exact);
81 if (auto *PDI = dyn_cast<PossiblyDisjointInst>(I))
82 PDI->setIsDisjoint(Disjoint);
83 if (auto *PNI = dyn_cast<PossiblyNonNegInst>(I))
84 PNI->setNonNeg(NNeg);
85 if (isa<TruncInst>(I)) {
86 I->setHasNoUnsignedWrap(NUW);
87 I->setHasNoSignedWrap(NSW);
88 }
89 if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
90 GEP->setNoWrapFlags(GEPNW);
91 if (auto *ICmp = dyn_cast<ICmpInst>(I))
92 ICmp->setSameSign(SameSign);
93}
94
95/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
96/// reusing an existing cast if a suitable one (= dominating IP) exists, or
97/// creating a new one.
98Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
101 // This function must be called with the builder having a valid insertion
102 // point. It doesn't need to be the actual IP where the uses of the returned
103 // cast will be added, but it must dominate such IP.
104 // We use this precondition to produce a cast that will dominate all its
105 // uses. In particular, this is crucial for the case where the builder's
106 // insertion point *is* the point where we were asked to put the cast.
107 // Since we don't know the builder's insertion point is actually
108 // where the uses will be added (only that it dominates it), we are
109 // not allowed to move it.
110 BasicBlock::iterator BIP = Builder.GetInsertPoint();
111
112 Value *Ret = nullptr;
113
114 // Check to see if there is already a cast!
115 for (User *U : V->users()) {
116 if (U->getType() != Ty)
117 continue;
118 CastInst *CI = dyn_cast<CastInst>(U);
119 if (!CI || CI->getOpcode() != Op)
120 continue;
121
122 // Found a suitable cast that is at IP or comes before IP. Use it. Note that
123 // the cast must also properly dominate the Builder's insertion point.
124 if (IP->getParent() == CI->getParent() && &*BIP != CI &&
125 (&*IP == CI || CI->comesBefore(&*IP))) {
126 Ret = CI;
127 break;
128 }
129 }
130
131 // Create a new cast.
132 if (!Ret) {
133 SCEVInsertPointGuard Guard(Builder, this);
134 Builder.SetInsertPoint(&*IP);
135 Ret = Builder.CreateCast(Op, V, Ty, V->getName());
136 }
137
138 // We assert at the end of the function since IP might point to an
139 // instruction with different dominance properties than a cast
140 // (an invoke for example) and not dominate BIP (but the cast does).
141 assert(!isa<Instruction>(Ret) ||
142 SE.DT.dominates(cast<Instruction>(Ret), &*BIP));
143
144 return Ret;
145}
146
149 Instruction *MustDominate) const {
150 BasicBlock::iterator IP = ++I->getIterator();
151 if (auto *II = dyn_cast<InvokeInst>(I))
152 IP = II->getNormalDest()->begin();
153
154 while (isa<PHINode>(IP))
155 ++IP;
156
157 if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) {
158 ++IP;
159 } else if (isa<CatchSwitchInst>(IP)) {
160 IP = MustDominate->getParent()->getFirstInsertionPt();
161 } else {
162 assert(!IP->isEHPad() && "unexpected eh pad!");
163 }
164
165 // Adjust insert point to be after instructions inserted by the expander, so
166 // we can re-use already inserted instructions. Avoid skipping past the
167 // original \p MustDominate, in case it is an inserted instruction.
168 while (isInsertedInstruction(&*IP) && &*IP != MustDominate)
169 ++IP;
170
171 return IP;
172}
173
175SCEVExpander::GetOptimalInsertionPointForCastOf(Value *V) const {
176 // Cast the argument at the beginning of the entry block, after
177 // any bitcasts of other arguments.
178 if (Argument *A = dyn_cast<Argument>(V)) {
179 BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
180 while ((isa<BitCastInst>(IP) &&
181 isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
182 cast<BitCastInst>(IP)->getOperand(0) != A) ||
183 isa<DbgInfoIntrinsic>(IP))
184 ++IP;
185 return IP;
186 }
187
188 // Cast the instruction immediately after the instruction.
189 if (Instruction *I = dyn_cast<Instruction>(V))
190 return findInsertPointAfter(I, &*Builder.GetInsertPoint());
191
192 // Otherwise, this must be some kind of a constant,
193 // so let's plop this cast into the function's entry block.
194 assert(isa<Constant>(V) &&
195 "Expected the cast argument to be a global/constant");
196 return Builder.GetInsertBlock()
197 ->getParent()
198 ->getEntryBlock()
200}
201
202/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
203/// which must be possible with a noop cast, doing what we can to share
204/// the casts.
205Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
206 Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
207 assert((Op == Instruction::BitCast ||
208 Op == Instruction::PtrToInt ||
209 Op == Instruction::IntToPtr) &&
210 "InsertNoopCastOfTo cannot perform non-noop casts!");
211 assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
212 "InsertNoopCastOfTo cannot change sizes!");
213
214 // inttoptr only works for integral pointers. For non-integral pointers, we
215 // can create a GEP on null with the integral value as index. Note that
216 // it is safe to use GEP of null instead of inttoptr here, because only
217 // expressions already based on a GEP of null should be converted to pointers
218 // during expansion.
219 if (Op == Instruction::IntToPtr) {
220 auto *PtrTy = cast<PointerType>(Ty);
221 if (DL.isNonIntegralPointerType(PtrTy))
222 return Builder.CreatePtrAdd(Constant::getNullValue(PtrTy), V, "scevgep");
223 }
224 // Short-circuit unnecessary bitcasts.
225 if (Op == Instruction::BitCast) {
226 if (V->getType() == Ty)
227 return V;
228 if (CastInst *CI = dyn_cast<CastInst>(V)) {
229 if (CI->getOperand(0)->getType() == Ty)
230 return CI->getOperand(0);
231 }
232 }
233 // Short-circuit unnecessary inttoptr<->ptrtoint casts.
234 if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
235 SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
236 if (CastInst *CI = dyn_cast<CastInst>(V))
237 if ((CI->getOpcode() == Instruction::PtrToInt ||
238 CI->getOpcode() == Instruction::IntToPtr) &&
239 SE.getTypeSizeInBits(CI->getType()) ==
241 return CI->getOperand(0);
242 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
243 if ((CE->getOpcode() == Instruction::PtrToInt ||
244 CE->getOpcode() == Instruction::IntToPtr) &&
245 SE.getTypeSizeInBits(CE->getType()) ==
246 SE.getTypeSizeInBits(CE->getOperand(0)->getType()))
247 return CE->getOperand(0);
248 }
249
250 // Fold a cast of a constant.
251 if (Constant *C = dyn_cast<Constant>(V))
252 return ConstantExpr::getCast(Op, C, Ty);
253
254 // Try to reuse existing cast, or insert one.
255 return ReuseOrCreateCast(V, Ty, Op, GetOptimalInsertionPointForCastOf(V));
256}
257
258/// InsertBinop - Insert the specified binary operator, doing a small amount
259/// of work to avoid inserting an obviously redundant operation, and hoisting
260/// to an outer loop when the opportunity is there and it is safe.
261Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
262 Value *LHS, Value *RHS,
263 SCEV::NoWrapFlags Flags, bool IsSafeToHoist) {
264 // Fold a binop with constant operands.
265 if (Constant *CLHS = dyn_cast<Constant>(LHS))
266 if (Constant *CRHS = dyn_cast<Constant>(RHS))
267 if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, DL))
268 return Res;
269
270 // Do a quick scan to see if we have this binop nearby. If so, reuse it.
271 unsigned ScanLimit = 6;
272 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
273 // Scanning starts from the last instruction before the insertion point.
275 if (IP != BlockBegin) {
276 --IP;
277 for (; ScanLimit; --IP, --ScanLimit) {
278 // Don't count dbg.value against the ScanLimit, to avoid perturbing the
279 // generated code.
280 if (isa<DbgInfoIntrinsic>(IP))
281 ScanLimit++;
282
283 auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) {
284 // Ensure that no-wrap flags match.
285 if (isa<OverflowingBinaryOperator>(I)) {
286 if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW))
287 return true;
288 if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW))
289 return true;
290 }
291 // Conservatively, do not use any instruction which has any of exact
292 // flags installed.
293 if (isa<PossiblyExactOperator>(I) && I->isExact())
294 return true;
295 return false;
296 };
297 if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
298 IP->getOperand(1) == RHS && !canGenerateIncompatiblePoison(&*IP))
299 return &*IP;
300 if (IP == BlockBegin) break;
301 }
302 }
303
304 // Save the original insertion point so we can restore it when we're done.
305 DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
306 SCEVInsertPointGuard Guard(Builder, this);
307
308 if (IsSafeToHoist) {
309 // Move the insertion point out of as many loops as we can.
310 while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
311 if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
312 BasicBlock *Preheader = L->getLoopPreheader();
313 if (!Preheader) break;
314
315 // Ok, move up a level.
316 Builder.SetInsertPoint(Preheader->getTerminator());
317 }
318 }
319
320 // If we haven't found this binop, insert it.
321 // TODO: Use the Builder, which will make CreateBinOp below fold with
322 // InstSimplifyFolder.
323 Instruction *BO = Builder.Insert(BinaryOperator::Create(Opcode, LHS, RHS));
324 BO->setDebugLoc(Loc);
325 if (Flags & SCEV::FlagNUW)
327 if (Flags & SCEV::FlagNSW)
328 BO->setHasNoSignedWrap();
329
330 return BO;
331}
332
333/// expandAddToGEP - Expand an addition expression with a pointer type into
334/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
335/// BasicAliasAnalysis and other passes analyze the result. See the rules
336/// for getelementptr vs. inttoptr in
337/// http://llvm.org/docs/LangRef.html#pointeraliasing
338/// for details.
339///
340/// Design note: The correctness of using getelementptr here depends on
341/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
342/// they may introduce pointer arithmetic which may not be safely converted
343/// into getelementptr.
344///
345/// Design note: It might seem desirable for this function to be more
346/// loop-aware. If some of the indices are loop-invariant while others
347/// aren't, it might seem desirable to emit multiple GEPs, keeping the
348/// loop-invariant portions of the overall computation outside the loop.
349/// However, there are a few reasons this is not done here. Hoisting simple
350/// arithmetic is a low-level optimization that often isn't very
351/// important until late in the optimization process. In fact, passes
352/// like InstructionCombining will combine GEPs, even if it means
353/// pushing loop-invariant computation down into loops, so even if the
354/// GEPs were split here, the work would quickly be undone. The
355/// LoopStrengthReduction pass, which is usually run quite late (and
356/// after the last InstructionCombining pass), takes care of hoisting
357/// loop-invariant portions of expressions, after considering what
358/// can be folded using target addressing modes.
359///
360Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Value *V,
361 SCEV::NoWrapFlags Flags) {
362 assert(!isa<Instruction>(V) ||
363 SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
364
365 Value *Idx = expand(Offset);
368
369 // Fold a GEP with constant operands.
370 if (Constant *CLHS = dyn_cast<Constant>(V))
371 if (Constant *CRHS = dyn_cast<Constant>(Idx))
372 return Builder.CreatePtrAdd(CLHS, CRHS, "", NW);
373
374 // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
375 unsigned ScanLimit = 6;
376 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
377 // Scanning starts from the last instruction before the insertion point.
379 if (IP != BlockBegin) {
380 --IP;
381 for (; ScanLimit; --IP, --ScanLimit) {
382 // Don't count dbg.value against the ScanLimit, to avoid perturbing the
383 // generated code.
384 if (isa<DbgInfoIntrinsic>(IP))
385 ScanLimit++;
386 if (auto *GEP = dyn_cast<GetElementPtrInst>(IP)) {
387 if (GEP->getPointerOperand() == V &&
388 GEP->getSourceElementType() == Builder.getInt8Ty() &&
389 GEP->getOperand(1) == Idx) {
390 rememberFlags(GEP);
391 GEP->setNoWrapFlags(GEP->getNoWrapFlags() & NW);
392 return &*IP;
393 }
394 }
395 if (IP == BlockBegin) break;
396 }
397 }
398
399 // Save the original insertion point so we can restore it when we're done.
400 SCEVInsertPointGuard Guard(Builder, this);
401
402 // Move the insertion point out of as many loops as we can.
403 while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
404 if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
405 BasicBlock *Preheader = L->getLoopPreheader();
406 if (!Preheader) break;
407
408 // Ok, move up a level.
409 Builder.SetInsertPoint(Preheader->getTerminator());
410 }
411
412 // Emit a GEP.
413 return Builder.CreatePtrAdd(V, Idx, "scevgep", NW);
414}
415
416/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
417/// SCEV expansion. If they are nested, this is the most nested. If they are
418/// neighboring, pick the later.
419static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
420 DominatorTree &DT) {
421 if (!A) return B;
422 if (!B) return A;
423 if (A->contains(B)) return B;
424 if (B->contains(A)) return A;
425 if (DT.dominates(A->getHeader(), B->getHeader())) return B;
426 if (DT.dominates(B->getHeader(), A->getHeader())) return A;
427 return A; // Arbitrarily break the tie.
428}
429
430/// getRelevantLoop - Get the most relevant loop associated with the given
431/// expression, according to PickMostRelevantLoop.
432const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
433 // Test whether we've already computed the most relevant loop for this SCEV.
434 auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr));
435 if (!Pair.second)
436 return Pair.first->second;
437
438 switch (S->getSCEVType()) {
439 case scConstant:
440 case scVScale:
441 return nullptr; // A constant has no relevant loops.
442 case scTruncate:
443 case scZeroExtend:
444 case scSignExtend:
445 case scPtrToInt:
446 case scAddExpr:
447 case scMulExpr:
448 case scUDivExpr:
449 case scAddRecExpr:
450 case scUMaxExpr:
451 case scSMaxExpr:
452 case scUMinExpr:
453 case scSMinExpr:
455 const Loop *L = nullptr;
456 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
457 L = AR->getLoop();
458 for (const SCEV *Op : S->operands())
459 L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT);
460 return RelevantLoops[S] = L;
461 }
462 case scUnknown: {
463 const SCEVUnknown *U = cast<SCEVUnknown>(S);
464 if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
465 return Pair.first->second = SE.LI.getLoopFor(I->getParent());
466 // A non-instruction has no relevant loops.
467 return nullptr;
468 }
470 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
471 }
472 llvm_unreachable("Unexpected SCEV type!");
473}
474
475namespace {
476
477/// LoopCompare - Compare loops by PickMostRelevantLoop.
478class LoopCompare {
479 DominatorTree &DT;
480public:
481 explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
482
483 bool operator()(std::pair<const Loop *, const SCEV *> LHS,
484 std::pair<const Loop *, const SCEV *> RHS) const {
485 // Keep pointer operands sorted at the end.
486 if (LHS.second->getType()->isPointerTy() !=
487 RHS.second->getType()->isPointerTy())
488 return LHS.second->getType()->isPointerTy();
489
490 // Compare loops with PickMostRelevantLoop.
491 if (LHS.first != RHS.first)
492 return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
493
494 // If one operand is a non-constant negative and the other is not,
495 // put the non-constant negative on the right so that a sub can
496 // be used instead of a negate and add.
497 if (LHS.second->isNonConstantNegative()) {
498 if (!RHS.second->isNonConstantNegative())
499 return false;
500 } else if (RHS.second->isNonConstantNegative())
501 return true;
502
503 // Otherwise they are equivalent according to this comparison.
504 return false;
505 }
506};
507
508}
509
510Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
511 // Recognize the canonical representation of an unsimplifed urem.
512 const SCEV *URemLHS = nullptr;
513 const SCEV *URemRHS = nullptr;
514 if (SE.matchURem(S, URemLHS, URemRHS)) {
515 Value *LHS = expand(URemLHS);
516 Value *RHS = expand(URemRHS);
517 return InsertBinop(Instruction::URem, LHS, RHS, SCEV::FlagAnyWrap,
518 /*IsSafeToHoist*/ false);
519 }
520
521 // Collect all the add operands in a loop, along with their associated loops.
522 // Iterate in reverse so that constants are emitted last, all else equal, and
523 // so that pointer operands are inserted first, which the code below relies on
524 // to form more involved GEPs.
526 for (const SCEV *Op : reverse(S->operands()))
527 OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op));
528
529 // Sort by loop. Use a stable sort so that constants follow non-constants and
530 // pointer operands precede non-pointer operands.
531 llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
532
533 // Emit instructions to add all the operands. Hoist as much as possible
534 // out of loops, and form meaningful getelementptrs where possible.
535 Value *Sum = nullptr;
536 for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {
537 const Loop *CurLoop = I->first;
538 const SCEV *Op = I->second;
539 if (!Sum) {
540 // This is the first operand. Just expand it.
541 Sum = expand(Op);
542 ++I;
543 continue;
544 }
545
546 assert(!Op->getType()->isPointerTy() && "Only first op can be pointer");
547 if (isa<PointerType>(Sum->getType())) {
548 // The running sum expression is a pointer. Try to form a getelementptr
549 // at this level with that as the base.
551 for (; I != E && I->first == CurLoop; ++I) {
552 // If the operand is SCEVUnknown and not instructions, peek through
553 // it, to enable more of it to be folded into the GEP.
554 const SCEV *X = I->second;
555 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X))
556 if (!isa<Instruction>(U->getValue()))
557 X = SE.getSCEV(U->getValue());
558 NewOps.push_back(X);
559 }
560 Sum = expandAddToGEP(SE.getAddExpr(NewOps), Sum, S->getNoWrapFlags());
561 } else if (Op->isNonConstantNegative()) {
562 // Instead of doing a negate and add, just do a subtract.
563 Value *W = expand(SE.getNegativeSCEV(Op));
564 Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap,
565 /*IsSafeToHoist*/ true);
566 ++I;
567 } else {
568 // A simple add.
569 Value *W = expand(Op);
570 // Canonicalize a constant to the RHS.
571 if (isa<Constant>(Sum))
572 std::swap(Sum, W);
573 Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(),
574 /*IsSafeToHoist*/ true);
575 ++I;
576 }
577 }
578
579 return Sum;
580}
581
582Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
583 Type *Ty = S->getType();
584
585 // Collect all the mul operands in a loop, along with their associated loops.
586 // Iterate in reverse so that constants are emitted last, all else equal.
588 for (const SCEV *Op : reverse(S->operands()))
589 OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op));
590
591 // Sort by loop. Use a stable sort so that constants follow non-constants.
592 llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
593
594 // Emit instructions to mul all the operands. Hoist as much as possible
595 // out of loops.
596 Value *Prod = nullptr;
597 auto I = OpsAndLoops.begin();
598
599 // Expand the calculation of X pow N in the following manner:
600 // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
601 // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK).
602 const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops]() {
603 auto E = I;
604 // Calculate how many times the same operand from the same loop is included
605 // into this power.
606 uint64_t Exponent = 0;
607 const uint64_t MaxExponent = UINT64_MAX >> 1;
608 // No one sane will ever try to calculate such huge exponents, but if we
609 // need this, we stop on UINT64_MAX / 2 because we need to exit the loop
610 // below when the power of 2 exceeds our Exponent, and we want it to be
611 // 1u << 31 at most to not deal with unsigned overflow.
612 while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) {
613 ++Exponent;
614 ++E;
615 }
616 assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?");
617
618 // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
619 // that are needed into the result.
620 Value *P = expand(I->second);
621 Value *Result = nullptr;
622 if (Exponent & 1)
623 Result = P;
624 for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) {
625 P = InsertBinop(Instruction::Mul, P, P, SCEV::FlagAnyWrap,
626 /*IsSafeToHoist*/ true);
627 if (Exponent & BinExp)
628 Result = Result ? InsertBinop(Instruction::Mul, Result, P,
630 /*IsSafeToHoist*/ true)
631 : P;
632 }
633
634 I = E;
635 assert(Result && "Nothing was expanded?");
636 return Result;
637 };
638
639 while (I != OpsAndLoops.end()) {
640 if (!Prod) {
641 // This is the first operand. Just expand it.
642 Prod = ExpandOpBinPowN();
643 } else if (I->second->isAllOnesValue()) {
644 // Instead of doing a multiply by negative one, just do a negate.
645 Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod,
646 SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
647 ++I;
648 } else {
649 // A simple mul.
650 Value *W = ExpandOpBinPowN();
651 // Canonicalize a constant to the RHS.
652 if (isa<Constant>(Prod)) std::swap(Prod, W);
653 const APInt *RHS;
654 if (match(W, m_Power2(RHS))) {
655 // Canonicalize Prod*(1<<C) to Prod<<C.
656 assert(!Ty->isVectorTy() && "vector types are not SCEVable");
657 auto NWFlags = S->getNoWrapFlags();
658 // clear nsw flag if shl will produce poison value.
659 if (RHS->logBase2() == RHS->getBitWidth() - 1)
660 NWFlags = ScalarEvolution::clearFlags(NWFlags, SCEV::FlagNSW);
661 Prod = InsertBinop(Instruction::Shl, Prod,
662 ConstantInt::get(Ty, RHS->logBase2()), NWFlags,
663 /*IsSafeToHoist*/ true);
664 } else {
665 Prod = InsertBinop(Instruction::Mul, Prod, W, S->getNoWrapFlags(),
666 /*IsSafeToHoist*/ true);
667 }
668 }
669 }
670
671 return Prod;
672}
673
674Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
675 Value *LHS = expand(S->getLHS());
676 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
677 const APInt &RHS = SC->getAPInt();
678 if (RHS.isPowerOf2())
679 return InsertBinop(Instruction::LShr, LHS,
680 ConstantInt::get(SC->getType(), RHS.logBase2()),
681 SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
682 }
683
684 const SCEV *RHSExpr = S->getRHS();
685 Value *RHS = expand(RHSExpr);
686 if (SafeUDivMode) {
687 bool GuaranteedNotPoison =
688 ScalarEvolution::isGuaranteedNotToBePoison(RHSExpr);
689 if (!GuaranteedNotPoison)
690 RHS = Builder.CreateFreeze(RHS);
691
692 // We need an umax if either RHSExpr is not known to be zero, or if it is
693 // not guaranteed to be non-poison. In the later case, the frozen poison may
694 // be 0.
695 if (!SE.isKnownNonZero(RHSExpr) || !GuaranteedNotPoison)
696 RHS = Builder.CreateIntrinsic(RHS->getType(), Intrinsic::umax,
697 {RHS, ConstantInt::get(RHS->getType(), 1)});
698 }
699 return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap,
700 /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS()));
701}
702
703/// Determine if this is a well-behaved chain of instructions leading back to
704/// the PHI. If so, it may be reused by expanded expressions.
705bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
706 const Loop *L) {
707 if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
708 (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV)))
709 return false;
710 // If any of the operands don't dominate the insert position, bail.
711 // Addrec operands are always loop-invariant, so this can only happen
712 // if there are instructions which haven't been hoisted.
713 if (L == IVIncInsertLoop) {
714 for (Use &Op : llvm::drop_begin(IncV->operands()))
715 if (Instruction *OInst = dyn_cast<Instruction>(Op))
716 if (!SE.DT.dominates(OInst, IVIncInsertPos))
717 return false;
718 }
719 // Advance to the next instruction.
720 IncV = dyn_cast<Instruction>(IncV->getOperand(0));
721 if (!IncV)
722 return false;
723
724 if (IncV->mayHaveSideEffects())
725 return false;
726
727 if (IncV == PN)
728 return true;
729
730 return isNormalAddRecExprPHI(PN, IncV, L);
731}
732
733/// getIVIncOperand returns an induction variable increment's induction
734/// variable operand.
735///
736/// If allowScale is set, any type of GEP is allowed as long as the nonIV
737/// operands dominate InsertPos.
738///
739/// If allowScale is not set, ensure that a GEP increment conforms to one of the
740/// simple patterns generated by getAddRecExprPHILiterally and
741/// expandAddtoGEP. If the pattern isn't recognized, return NULL.
743 Instruction *InsertPos,
744 bool allowScale) {
745 if (IncV == InsertPos)
746 return nullptr;
747
748 switch (IncV->getOpcode()) {
749 default:
750 return nullptr;
751 // Check for a simple Add/Sub or GEP of a loop invariant step.
752 case Instruction::Add:
753 case Instruction::Sub: {
754 Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
755 if (!OInst || SE.DT.dominates(OInst, InsertPos))
756 return dyn_cast<Instruction>(IncV->getOperand(0));
757 return nullptr;
758 }
759 case Instruction::BitCast:
760 return dyn_cast<Instruction>(IncV->getOperand(0));
761 case Instruction::GetElementPtr:
762 for (Use &U : llvm::drop_begin(IncV->operands())) {
763 if (isa<Constant>(U))
764 continue;
765 if (Instruction *OInst = dyn_cast<Instruction>(U)) {
766 if (!SE.DT.dominates(OInst, InsertPos))
767 return nullptr;
768 }
769 if (allowScale) {
770 // allow any kind of GEP as long as it can be hoisted.
771 continue;
772 }
773 // GEPs produced by SCEVExpander use i8 element type.
774 if (!cast<GEPOperator>(IncV)->getSourceElementType()->isIntegerTy(8))
775 return nullptr;
776 break;
777 }
778 return dyn_cast<Instruction>(IncV->getOperand(0));
779 }
780}
781
782/// If the insert point of the current builder or any of the builders on the
783/// stack of saved builders has 'I' as its insert point, update it to point to
784/// the instruction after 'I'. This is intended to be used when the instruction
785/// 'I' is being moved. If this fixup is not done and 'I' is moved to a
786/// different block, the inconsistent insert point (with a mismatched
787/// Instruction and Block) can lead to an instruction being inserted in a block
788/// other than its parent.
789void SCEVExpander::fixupInsertPoints(Instruction *I) {
791 BasicBlock::iterator NewInsertPt = std::next(It);
792 if (Builder.GetInsertPoint() == It)
793 Builder.SetInsertPoint(&*NewInsertPt);
794 for (auto *InsertPtGuard : InsertPointGuards)
795 if (InsertPtGuard->GetInsertPoint() == It)
796 InsertPtGuard->SetInsertPoint(NewInsertPt);
797}
798
799/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
800/// it available to other uses in this loop. Recursively hoist any operands,
801/// until we reach a value that dominates InsertPos.
803 bool RecomputePoisonFlags) {
804 auto FixupPoisonFlags = [this](Instruction *I) {
805 // Drop flags that are potentially inferred from old context and infer flags
806 // in new context.
807 rememberFlags(I);
808 I->dropPoisonGeneratingFlags();
809 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I))
810 if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
811 auto *BO = cast<BinaryOperator>(I);
816 }
817 };
818
819 if (SE.DT.dominates(IncV, InsertPos)) {
820 if (RecomputePoisonFlags)
821 FixupPoisonFlags(IncV);
822 return true;
823 }
824
825 // InsertPos must itself dominate IncV so that IncV's new position satisfies
826 // its existing users.
827 if (isa<PHINode>(InsertPos) ||
828 !SE.DT.dominates(InsertPos->getParent(), IncV->getParent()))
829 return false;
830
831 if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos))
832 return false;
833
834 // Check that the chain of IV operands leading back to Phi can be hoisted.
836 for(;;) {
837 Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true);
838 if (!Oper)
839 return false;
840 // IncV is safe to hoist.
841 IVIncs.push_back(IncV);
842 IncV = Oper;
843 if (SE.DT.dominates(IncV, InsertPos))
844 break;
845 }
846 for (Instruction *I : llvm::reverse(IVIncs)) {
847 fixupInsertPoints(I);
848 I->moveBefore(InsertPos);
849 if (RecomputePoisonFlags)
850 FixupPoisonFlags(I);
851 }
852 return true;
853}
854
856 PHINode *WidePhi,
857 Instruction *OrigInc,
858 Instruction *WideInc) {
859 return match(OrigInc, m_c_BinOp(m_Specific(OrigPhi), m_Value())) &&
860 match(WideInc, m_c_BinOp(m_Specific(WidePhi), m_Value())) &&
861 OrigInc->getOpcode() == WideInc->getOpcode();
862}
863
864/// Determine if this cyclic phi is in a form that would have been generated by
865/// LSR. We don't care if the phi was actually expanded in this pass, as long
866/// as it is in a low-cost form, for example, no implied multiplication. This
867/// should match any patterns generated by getAddRecExprPHILiterally and
868/// expandAddtoGEP.
869bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
870 const Loop *L) {
871 for(Instruction *IVOper = IncV;
872 (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(),
873 /*allowScale=*/false));) {
874 if (IVOper == PN)
875 return true;
876 }
877 return false;
878}
879
880/// expandIVInc - Expand an IV increment at Builder's current InsertPos.
881/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
882/// need to materialize IV increments elsewhere to handle difficult situations.
883Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
884 bool useSubtract) {
885 Value *IncV;
886 // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
887 if (PN->getType()->isPointerTy()) {
888 // TODO: Change name to IVName.iv.next.
889 IncV = Builder.CreatePtrAdd(PN, StepV, "scevgep");
890 } else {
891 IncV = useSubtract ?
892 Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
893 Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
894 }
895 return IncV;
896}
897
898/// Check whether we can cheaply express the requested SCEV in terms of
899/// the available PHI SCEV by truncation and/or inversion of the step.
901 const SCEVAddRecExpr *Phi,
902 const SCEVAddRecExpr *Requested,
903 bool &InvertStep) {
904 // We can't transform to match a pointer PHI.
905 Type *PhiTy = Phi->getType();
906 Type *RequestedTy = Requested->getType();
907 if (PhiTy->isPointerTy() || RequestedTy->isPointerTy())
908 return false;
909
910 if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
911 return false;
912
913 // Try truncate it if necessary.
914 Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy));
915 if (!Phi)
916 return false;
917
918 // Check whether truncation will help.
919 if (Phi == Requested) {
920 InvertStep = false;
921 return true;
922 }
923
924 // Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
925 if (SE.getMinusSCEV(Requested->getStart(), Requested) == Phi) {
926 InvertStep = true;
927 return true;
928 }
929
930 return false;
931}
932
933static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
934 if (!isa<IntegerType>(AR->getType()))
935 return false;
936
937 unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
938 Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
939 const SCEV *Step = AR->getStepRecurrence(SE);
940 const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy),
941 SE.getSignExtendExpr(AR, WideTy));
942 const SCEV *ExtendAfterOp =
943 SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy);
944 return ExtendAfterOp == OpAfterExtend;
945}
946
947static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
948 if (!isa<IntegerType>(AR->getType()))
949 return false;
950
951 unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
952 Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
953 const SCEV *Step = AR->getStepRecurrence(SE);
954 const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy),
955 SE.getZeroExtendExpr(AR, WideTy));
956 const SCEV *ExtendAfterOp =
957 SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy);
958 return ExtendAfterOp == OpAfterExtend;
959}
960
961/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
962/// the base addrec, which is the addrec without any non-loop-dominating
963/// values, and return the PHI.
964PHINode *
965SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
966 const Loop *L, Type *&TruncTy,
967 bool &InvertStep) {
968 assert((!IVIncInsertLoop || IVIncInsertPos) &&
969 "Uninitialized insert position");
970
971 // Reuse a previously-inserted PHI, if present.
972 BasicBlock *LatchBlock = L->getLoopLatch();
973 if (LatchBlock) {
974 PHINode *AddRecPhiMatch = nullptr;
975 Instruction *IncV = nullptr;
976 TruncTy = nullptr;
977 InvertStep = false;
978
979 // Only try partially matching scevs that need truncation and/or
980 // step-inversion if we know this loop is outside the current loop.
981 bool TryNonMatchingSCEV =
982 IVIncInsertLoop &&
983 SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());
984
985 for (PHINode &PN : L->getHeader()->phis()) {
986 if (!SE.isSCEVable(PN.getType()))
987 continue;
988
989 // We should not look for a incomplete PHI. Getting SCEV for a incomplete
990 // PHI has no meaning at all.
991 if (!PN.isComplete()) {
993 DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n");
994 continue;
995 }
996
997 const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
998 if (!PhiSCEV)
999 continue;
1000
1001 bool IsMatchingSCEV = PhiSCEV == Normalized;
1002 // We only handle truncation and inversion of phi recurrences for the
1003 // expanded expression if the expanded expression's loop dominates the
1004 // loop we insert to. Check now, so we can bail out early.
1005 if (!IsMatchingSCEV && !TryNonMatchingSCEV)
1006 continue;
1007
1008 // TODO: this possibly can be reworked to avoid this cast at all.
1009 Instruction *TempIncV =
1010 dyn_cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock));
1011 if (!TempIncV)
1012 continue;
1013
1014 // Check whether we can reuse this PHI node.
1015 if (LSRMode) {
1016 if (!isExpandedAddRecExprPHI(&PN, TempIncV, L))
1017 continue;
1018 } else {
1019 if (!isNormalAddRecExprPHI(&PN, TempIncV, L))
1020 continue;
1021 }
1022
1023 // Stop if we have found an exact match SCEV.
1024 if (IsMatchingSCEV) {
1025 IncV = TempIncV;
1026 TruncTy = nullptr;
1027 InvertStep = false;
1028 AddRecPhiMatch = &PN;
1029 break;
1030 }
1031
1032 // Try whether the phi can be translated into the requested form
1033 // (truncated and/or offset by a constant).
1034 if ((!TruncTy || InvertStep) &&
1035 canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) {
1036 // Record the phi node. But don't stop we might find an exact match
1037 // later.
1038 AddRecPhiMatch = &PN;
1039 IncV = TempIncV;
1040 TruncTy = Normalized->getType();
1041 }
1042 }
1043
1044 if (AddRecPhiMatch) {
1045 // Ok, the add recurrence looks usable.
1046 // Remember this PHI, even in post-inc mode.
1047 InsertedValues.insert(AddRecPhiMatch);
1048 // Remember the increment.
1049 rememberInstruction(IncV);
1050 // Those values were not actually inserted but re-used.
1051 ReusedValues.insert(AddRecPhiMatch);
1052 ReusedValues.insert(IncV);
1053 return AddRecPhiMatch;
1054 }
1055 }
1056
1057 // Save the original insertion point so we can restore it when we're done.
1058 SCEVInsertPointGuard Guard(Builder, this);
1059
1060 // Another AddRec may need to be recursively expanded below. For example, if
1061 // this AddRec is quadratic, the StepV may itself be an AddRec in this
1062 // loop. Remove this loop from the PostIncLoops set before expanding such
1063 // AddRecs. Otherwise, we cannot find a valid position for the step
1064 // (i.e. StepV can never dominate its loop header). Ideally, we could do
1065 // SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
1066 // so it's not worth implementing SmallPtrSet::swap.
1067 PostIncLoopSet SavedPostIncLoops = PostIncLoops;
1068 PostIncLoops.clear();
1069
1070 // Expand code for the start value into the loop preheader.
1071 assert(L->getLoopPreheader() &&
1072 "Can't expand add recurrences without a loop preheader!");
1073 Value *StartV =
1074 expand(Normalized->getStart(), L->getLoopPreheader()->getTerminator());
1075
1076 // StartV must have been be inserted into L's preheader to dominate the new
1077 // phi.
1078 assert(!isa<Instruction>(StartV) ||
1079 SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
1080 L->getHeader()));
1081
1082 // Expand code for the step value. Do this before creating the PHI so that PHI
1083 // reuse code doesn't see an incomplete PHI.
1084 const SCEV *Step = Normalized->getStepRecurrence(SE);
1085 Type *ExpandTy = Normalized->getType();
1086 // If the stride is negative, insert a sub instead of an add for the increment
1087 // (unless it's a constant, because subtracts of constants are canonicalized
1088 // to adds).
1089 bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
1090 if (useSubtract)
1091 Step = SE.getNegativeSCEV(Step);
1092 // Expand the step somewhere that dominates the loop header.
1093 Value *StepV = expand(Step, L->getHeader()->getFirstInsertionPt());
1094
1095 // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
1096 // we actually do emit an addition. It does not apply if we emit a
1097 // subtraction.
1098 bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized);
1099 bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized);
1100
1101 // Create the PHI.
1102 BasicBlock *Header = L->getHeader();
1103 Builder.SetInsertPoint(Header, Header->begin());
1104 PHINode *PN =
1105 Builder.CreatePHI(ExpandTy, pred_size(Header), Twine(IVName) + ".iv");
1106
1107 // Create the step instructions and populate the PHI.
1108 for (BasicBlock *Pred : predecessors(Header)) {
1109 // Add a start value.
1110 if (!L->contains(Pred)) {
1111 PN->addIncoming(StartV, Pred);
1112 continue;
1113 }
1114
1115 // Create a step value and add it to the PHI.
1116 // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
1117 // instructions at IVIncInsertPos.
1118 Instruction *InsertPos = L == IVIncInsertLoop ?
1119 IVIncInsertPos : Pred->getTerminator();
1120 Builder.SetInsertPoint(InsertPos);
1121 Value *IncV = expandIVInc(PN, StepV, L, useSubtract);
1122
1123 if (isa<OverflowingBinaryOperator>(IncV)) {
1124 if (IncrementIsNUW)
1125 cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap();
1126 if (IncrementIsNSW)
1127 cast<BinaryOperator>(IncV)->setHasNoSignedWrap();
1128 }
1129 PN->addIncoming(IncV, Pred);
1130 }
1131
1132 // After expanding subexpressions, restore the PostIncLoops set so the caller
1133 // can ensure that IVIncrement dominates the current uses.
1134 PostIncLoops = SavedPostIncLoops;
1135
1136 // Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most
1137 // effective when we are able to use an IV inserted here, so record it.
1138 InsertedValues.insert(PN);
1139 InsertedIVs.push_back(PN);
1140 return PN;
1141}
1142
1143Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
1144 const Loop *L = S->getLoop();
1145
1146 // Determine a normalized form of this expression, which is the expression
1147 // before any post-inc adjustment is made.
1148 const SCEVAddRecExpr *Normalized = S;
1149 if (PostIncLoops.count(L)) {
1151 Loops.insert(L);
1152 Normalized = cast<SCEVAddRecExpr>(
1153 normalizeForPostIncUse(S, Loops, SE, /*CheckInvertible=*/false));
1154 }
1155
1156 [[maybe_unused]] const SCEV *Start = Normalized->getStart();
1157 const SCEV *Step = Normalized->getStepRecurrence(SE);
1158 assert(SE.properlyDominates(Start, L->getHeader()) &&
1159 "Start does not properly dominate loop header");
1160 assert(SE.dominates(Step, L->getHeader()) && "Step not dominate loop header");
1161
1162 // In some cases, we decide to reuse an existing phi node but need to truncate
1163 // it and/or invert the step.
1164 Type *TruncTy = nullptr;
1165 bool InvertStep = false;
1166 PHINode *PN = getAddRecExprPHILiterally(Normalized, L, TruncTy, InvertStep);
1167
1168 // Accommodate post-inc mode, if necessary.
1169 Value *Result;
1170 if (!PostIncLoops.count(L))
1171 Result = PN;
1172 else {
1173 // In PostInc mode, use the post-incremented value.
1174 BasicBlock *LatchBlock = L->getLoopLatch();
1175 assert(LatchBlock && "PostInc mode requires a unique loop latch!");
1176 Result = PN->getIncomingValueForBlock(LatchBlock);
1177
1178 // We might be introducing a new use of the post-inc IV that is not poison
1179 // safe, in which case we should drop poison generating flags. Only keep
1180 // those flags for which SCEV has proven that they always hold.
1181 if (isa<OverflowingBinaryOperator>(Result)) {
1182 auto *I = cast<Instruction>(Result);
1183 if (!S->hasNoUnsignedWrap())
1184 I->setHasNoUnsignedWrap(false);
1185 if (!S->hasNoSignedWrap())
1186 I->setHasNoSignedWrap(false);
1187 }
1188
1189 // For an expansion to use the postinc form, the client must call
1190 // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
1191 // or dominated by IVIncInsertPos.
1192 if (isa<Instruction>(Result) &&
1193 !SE.DT.dominates(cast<Instruction>(Result),
1194 &*Builder.GetInsertPoint())) {
1195 // The induction variable's postinc expansion does not dominate this use.
1196 // IVUsers tries to prevent this case, so it is rare. However, it can
1197 // happen when an IVUser outside the loop is not dominated by the latch
1198 // block. Adjusting IVIncInsertPos before expansion begins cannot handle
1199 // all cases. Consider a phi outside whose operand is replaced during
1200 // expansion with the value of the postinc user. Without fundamentally
1201 // changing the way postinc users are tracked, the only remedy is
1202 // inserting an extra IV increment. StepV might fold into PostLoopOffset,
1203 // but hopefully expandCodeFor handles that.
1204 bool useSubtract =
1205 !S->getType()->isPointerTy() && Step->isNonConstantNegative();
1206 if (useSubtract)
1207 Step = SE.getNegativeSCEV(Step);
1208 Value *StepV;
1209 {
1210 // Expand the step somewhere that dominates the loop header.
1211 SCEVInsertPointGuard Guard(Builder, this);
1212 StepV = expand(Step, L->getHeader()->getFirstInsertionPt());
1213 }
1214 Result = expandIVInc(PN, StepV, L, useSubtract);
1215 }
1216 }
1217
1218 // We have decided to reuse an induction variable of a dominating loop. Apply
1219 // truncation and/or inversion of the step.
1220 if (TruncTy) {
1221 // Truncate the result.
1222 if (TruncTy != Result->getType())
1223 Result = Builder.CreateTrunc(Result, TruncTy);
1224
1225 // Invert the result.
1226 if (InvertStep)
1227 Result = Builder.CreateSub(expand(Normalized->getStart()), Result);
1228 }
1229
1230 return Result;
1231}
1232
1233Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
1234 // In canonical mode we compute the addrec as an expression of a canonical IV
1235 // using evaluateAtIteration and expand the resulting SCEV expression. This
1236 // way we avoid introducing new IVs to carry on the computation of the addrec
1237 // throughout the loop.
1238 //
1239 // For nested addrecs evaluateAtIteration might need a canonical IV of a
1240 // type wider than the addrec itself. Emitting a canonical IV of the
1241 // proper type might produce non-legal types, for example expanding an i64
1242 // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall
1243 // back to non-canonical mode for nested addrecs.
1244 if (!CanonicalMode || (S->getNumOperands() > 2))
1245 return expandAddRecExprLiterally(S);
1246
1247 Type *Ty = SE.getEffectiveSCEVType(S->getType());
1248 const Loop *L = S->getLoop();
1249
1250 // First check for an existing canonical IV in a suitable type.
1251 PHINode *CanonicalIV = nullptr;
1252 if (PHINode *PN = L->getCanonicalInductionVariable())
1253 if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
1254 CanonicalIV = PN;
1255
1256 // Rewrite an AddRec in terms of the canonical induction variable, if
1257 // its type is more narrow.
1258 if (CanonicalIV &&
1259 SE.getTypeSizeInBits(CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty) &&
1260 !S->getType()->isPointerTy()) {
1262 for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
1263 NewOps[i] = SE.getAnyExtendExpr(S->getOperand(i), CanonicalIV->getType());
1264 Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
1266 BasicBlock::iterator NewInsertPt =
1267 findInsertPointAfter(cast<Instruction>(V), &*Builder.GetInsertPoint());
1268 V = expand(SE.getTruncateExpr(SE.getUnknown(V), Ty), NewInsertPt);
1269 return V;
1270 }
1271
1272 // {X,+,F} --> X + {0,+,F}
1273 if (!S->getStart()->isZero()) {
1274 if (isa<PointerType>(S->getType())) {
1275 Value *StartV = expand(SE.getPointerBase(S));
1276 return expandAddToGEP(SE.removePointerBase(S), StartV,
1278 }
1279
1281 NewOps[0] = SE.getConstant(Ty, 0);
1282 const SCEV *Rest = SE.getAddRecExpr(NewOps, L,
1284
1285 // Just do a normal add. Pre-expand the operands to suppress folding.
1286 //
1287 // The LHS and RHS values are factored out of the expand call to make the
1288 // output independent of the argument evaluation order.
1289 const SCEV *AddExprLHS = SE.getUnknown(expand(S->getStart()));
1290 const SCEV *AddExprRHS = SE.getUnknown(expand(Rest));
1291 return expand(SE.getAddExpr(AddExprLHS, AddExprRHS));
1292 }
1293
1294 // If we don't yet have a canonical IV, create one.
1295 if (!CanonicalIV) {
1296 // Create and insert the PHI node for the induction variable in the
1297 // specified loop.
1298 BasicBlock *Header = L->getHeader();
1299 pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
1300 CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar");
1301 CanonicalIV->insertBefore(Header->begin());
1302 rememberInstruction(CanonicalIV);
1303
1305 Constant *One = ConstantInt::get(Ty, 1);
1306 for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
1307 BasicBlock *HP = *HPI;
1308 if (!PredSeen.insert(HP).second) {
1309 // There must be an incoming value for each predecessor, even the
1310 // duplicates!
1311 CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP);
1312 continue;
1313 }
1314
1315 if (L->contains(HP)) {
1316 // Insert a unit add instruction right before the terminator
1317 // corresponding to the back-edge.
1318 Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
1319 "indvar.next",
1320 HP->getTerminator()->getIterator());
1321 Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
1322 rememberInstruction(Add);
1323 CanonicalIV->addIncoming(Add, HP);
1324 } else {
1325 CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP);
1326 }
1327 }
1328 }
1329
1330 // {0,+,1} --> Insert a canonical induction variable into the loop!
1331 if (S->isAffine() && S->getOperand(1)->isOne()) {
1332 assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
1333 "IVs with types different from the canonical IV should "
1334 "already have been handled!");
1335 return CanonicalIV;
1336 }
1337
1338 // {0,+,F} --> {0,+,1} * F
1339
1340 // If this is a simple linear addrec, emit it now as a special case.
1341 if (S->isAffine()) // {0,+,F} --> i*F
1342 return
1343 expand(SE.getTruncateOrNoop(
1344 SE.getMulExpr(SE.getUnknown(CanonicalIV),
1346 CanonicalIV->getType())),
1347 Ty));
1348
1349 // If this is a chain of recurrences, turn it into a closed form, using the
1350 // folders, then expandCodeFor the closed form. This allows the folders to
1351 // simplify the expression without having to build a bunch of special code
1352 // into this folder.
1353 const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV.
1354
1355 // Promote S up to the canonical IV type, if the cast is foldable.
1356 const SCEV *NewS = S;
1357 const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType());
1358 if (isa<SCEVAddRecExpr>(Ext))
1359 NewS = Ext;
1360
1361 const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
1362
1363 // Truncate the result down to the original type, if needed.
1364 const SCEV *T = SE.getTruncateOrNoop(V, Ty);
1365 return expand(T);
1366}
1367
1368Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) {
1369 Value *V = expand(S->getOperand());
1370 return ReuseOrCreateCast(V, S->getType(), CastInst::PtrToInt,
1371 GetOptimalInsertionPointForCastOf(V));
1372}
1373
1374Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
1375 Value *V = expand(S->getOperand());
1376 return Builder.CreateTrunc(V, S->getType());
1377}
1378
1379Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
1380 Value *V = expand(S->getOperand());
1381 return Builder.CreateZExt(V, S->getType(), "",
1383}
1384
1385Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
1386 Value *V = expand(S->getOperand());
1387 return Builder.CreateSExt(V, S->getType());
1388}
1389
1390Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S,
1391 Intrinsic::ID IntrinID, Twine Name,
1392 bool IsSequential) {
1393 bool PrevSafeMode = SafeUDivMode;
1394 SafeUDivMode |= IsSequential;
1395 Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
1396 Type *Ty = LHS->getType();
1397 if (IsSequential)
1398 LHS = Builder.CreateFreeze(LHS);
1399 for (int i = S->getNumOperands() - 2; i >= 0; --i) {
1400 SafeUDivMode = (IsSequential && i != 0) || PrevSafeMode;
1401 Value *RHS = expand(S->getOperand(i));
1402 if (IsSequential && i != 0)
1403 RHS = Builder.CreateFreeze(RHS);
1404 Value *Sel;
1405 if (Ty->isIntegerTy())
1406 Sel = Builder.CreateIntrinsic(IntrinID, {Ty}, {LHS, RHS},
1407 /*FMFSource=*/nullptr, Name);
1408 else {
1409 Value *ICmp =
1410 Builder.CreateICmp(MinMaxIntrinsic::getPredicate(IntrinID), LHS, RHS);
1411 Sel = Builder.CreateSelect(ICmp, LHS, RHS, Name);
1412 }
1413 LHS = Sel;
1414 }
1415 SafeUDivMode = PrevSafeMode;
1416 return LHS;
1417}
1418
1419Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
1420 return expandMinMaxExpr(S, Intrinsic::smax, "smax");
1421}
1422
1423Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
1424 return expandMinMaxExpr(S, Intrinsic::umax, "umax");
1425}
1426
1427Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
1428 return expandMinMaxExpr(S, Intrinsic::smin, "smin");
1429}
1430
1431Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
1432 return expandMinMaxExpr(S, Intrinsic::umin, "umin");
1433}
1434
1435Value *SCEVExpander::visitSequentialUMinExpr(const SCEVSequentialUMinExpr *S) {
1436 return expandMinMaxExpr(S, Intrinsic::umin, "umin", /*IsSequential*/true);
1437}
1438
1439Value *SCEVExpander::visitVScale(const SCEVVScale *S) {
1440 return Builder.CreateVScale(ConstantInt::get(S->getType(), 1));
1441}
1442
1445 setInsertPoint(IP);
1446 Value *V = expandCodeFor(SH, Ty);
1447 return V;
1448}
1449
1451 // Expand the code for this SCEV.
1452 Value *V = expand(SH);
1453
1454 if (Ty) {
1455 assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
1456 "non-trivial casts should be done with the SCEVs directly!");
1457 V = InsertNoopCastOfTo(V, Ty);
1458 }
1459 return V;
1460}
1461
1462Value *SCEVExpander::FindValueInExprValueMap(
1463 const SCEV *S, const Instruction *InsertPt,
1464 SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) {
1465 // If the expansion is not in CanonicalMode, and the SCEV contains any
1466 // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
1467 if (!CanonicalMode && SE.containsAddRecurrence(S))
1468 return nullptr;
1469
1470 // If S is a constant or unknown, it may be worse to reuse an existing Value.
1471 if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S))
1472 return nullptr;
1473
1474 for (Value *V : SE.getSCEVValues(S)) {
1475 Instruction *EntInst = dyn_cast<Instruction>(V);
1476 if (!EntInst)
1477 continue;
1478
1479 // Choose a Value from the set which dominates the InsertPt.
1480 // InsertPt should be inside the Value's parent loop so as not to break
1481 // the LCSSA form.
1482 assert(EntInst->getFunction() == InsertPt->getFunction());
1483 if (S->getType() != V->getType() || !SE.DT.dominates(EntInst, InsertPt) ||
1484 !(SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
1485 SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
1486 continue;
1487
1488 // Make sure reusing the instruction is poison-safe.
1489 if (SE.canReuseInstruction(S, EntInst, DropPoisonGeneratingInsts))
1490 return V;
1491 DropPoisonGeneratingInsts.clear();
1492 }
1493 return nullptr;
1494}
1495
1496// The expansion of SCEV will either reuse a previous Value in ExprValueMap,
1497// or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
1498// and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
1499// literally, to prevent LSR's transformed SCEV from being reverted. Otherwise,
1500// the expansion will try to reuse Value from ExprValueMap, and only when it
1501// fails, expand the SCEV literally.
1502Value *SCEVExpander::expand(const SCEV *S) {
1503 // Compute an insertion point for this SCEV object. Hoist the instructions
1504 // as far out in the loop nest as possible.
1505 BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
1506
1507 // We can move insertion point only if there is no div or rem operations
1508 // otherwise we are risky to move it over the check for zero denominator.
1509 auto SafeToHoist = [](const SCEV *S) {
1510 return !SCEVExprContains(S, [](const SCEV *S) {
1511 if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
1512 if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
1513 // Division by non-zero constants can be hoisted.
1514 return SC->getValue()->isZero();
1515 // All other divisions should not be moved as they may be
1516 // divisions by zero and should be kept within the
1517 // conditions of the surrounding loops that guard their
1518 // execution (see PR35406).
1519 return true;
1520 }
1521 return false;
1522 });
1523 };
1524 if (SafeToHoist(S)) {
1525 for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
1526 L = L->getParentLoop()) {
1527 if (SE.isLoopInvariant(S, L)) {
1528 if (!L) break;
1529 if (BasicBlock *Preheader = L->getLoopPreheader()) {
1530 InsertPt = Preheader->getTerminator()->getIterator();
1531 } else {
1532 // LSR sets the insertion point for AddRec start/step values to the
1533 // block start to simplify value reuse, even though it's an invalid
1534 // position. SCEVExpander must correct for this in all cases.
1535 InsertPt = L->getHeader()->getFirstInsertionPt();
1536 }
1537 } else {
1538 // If the SCEV is computable at this level, insert it into the header
1539 // after the PHIs (and after any other instructions that we've inserted
1540 // there) so that it is guaranteed to dominate any user inside the loop.
1541 if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
1542 InsertPt = L->getHeader()->getFirstInsertionPt();
1543
1544 while (InsertPt != Builder.GetInsertPoint() &&
1545 (isInsertedInstruction(&*InsertPt) ||
1546 isa<DbgInfoIntrinsic>(&*InsertPt))) {
1547 InsertPt = std::next(InsertPt);
1548 }
1549 break;
1550 }
1551 }
1552 }
1553
1554 // Check to see if we already expanded this here.
1555 auto I = InsertedExpressions.find(std::make_pair(S, &*InsertPt));
1556 if (I != InsertedExpressions.end())
1557 return I->second;
1558
1559 SCEVInsertPointGuard Guard(Builder, this);
1560 Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
1561
1562 // Expand the expression into instructions.
1563 SmallVector<Instruction *> DropPoisonGeneratingInsts;
1564 Value *V = FindValueInExprValueMap(S, &*InsertPt, DropPoisonGeneratingInsts);
1565 if (!V) {
1566 V = visit(S);
1567 V = fixupLCSSAFormFor(V);
1568 } else {
1569 for (Instruction *I : DropPoisonGeneratingInsts) {
1570 rememberFlags(I);
1571 I->dropPoisonGeneratingAnnotations();
1572 // See if we can re-infer from first principles any of the flags we just
1573 // dropped.
1574 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I))
1575 if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
1576 auto *BO = cast<BinaryOperator>(I);
1581 }
1582 if (auto *NNI = dyn_cast<PossiblyNonNegInst>(I)) {
1583 auto *Src = NNI->getOperand(0);
1585 Constant::getNullValue(Src->getType()), I,
1586 DL).value_or(false))
1587 NNI->setNonNeg(true);
1588 }
1589 }
1590 }
1591 // Remember the expanded value for this SCEV at this location.
1592 //
1593 // This is independent of PostIncLoops. The mapped value simply materializes
1594 // the expression at this insertion point. If the mapped value happened to be
1595 // a postinc expansion, it could be reused by a non-postinc user, but only if
1596 // its insertion point was already at the head of the loop.
1597 InsertedExpressions[std::make_pair(S, &*InsertPt)] = V;
1598 return V;
1599}
1600
1601void SCEVExpander::rememberInstruction(Value *I) {
1602 auto DoInsert = [this](Value *V) {
1603 if (!PostIncLoops.empty())
1604 InsertedPostIncValues.insert(V);
1605 else
1606 InsertedValues.insert(V);
1607 };
1608 DoInsert(I);
1609}
1610
1611void SCEVExpander::rememberFlags(Instruction *I) {
1612 // If we already have flags for the instruction, keep the existing ones.
1613 OrigFlags.try_emplace(I, PoisonFlags(I));
1614}
1615
1616void SCEVExpander::replaceCongruentIVInc(
1617 PHINode *&Phi, PHINode *&OrigPhi, Loop *L, const DominatorTree *DT,
1619 BasicBlock *LatchBlock = L->getLoopLatch();
1620 if (!LatchBlock)
1621 return;
1622
1623 Instruction *OrigInc =
1624 dyn_cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1625 Instruction *IsomorphicInc =
1626 dyn_cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
1627 if (!OrigInc || !IsomorphicInc)
1628 return;
1629
1630 // If this phi has the same width but is more canonical, replace the
1631 // original with it. As part of the "more canonical" determination,
1632 // respect a prior decision to use an IV chain.
1633 if (OrigPhi->getType() == Phi->getType() &&
1634 !(ChainedPhis.count(Phi) ||
1635 isExpandedAddRecExprPHI(OrigPhi, OrigInc, L)) &&
1636 (ChainedPhis.count(Phi) ||
1637 isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) {
1638 std::swap(OrigPhi, Phi);
1639 std::swap(OrigInc, IsomorphicInc);
1640 }
1641
1642 // Replacing the congruent phi is sufficient because acyclic
1643 // redundancy elimination, CSE/GVN, should handle the
1644 // rest. However, once SCEV proves that a phi is congruent,
1645 // it's often the head of an IV user cycle that is isomorphic
1646 // with the original phi. It's worth eagerly cleaning up the
1647 // common case of a single IV increment so that DeleteDeadPHIs
1648 // can remove cycles that had postinc uses.
1649 // Because we may potentially introduce a new use of OrigIV that didn't
1650 // exist before at this point, its poison flags need readjustment.
1651 const SCEV *TruncExpr =
1652 SE.getTruncateOrNoop(SE.getSCEV(OrigInc), IsomorphicInc->getType());
1653 if (OrigInc == IsomorphicInc || TruncExpr != SE.getSCEV(IsomorphicInc) ||
1654 !SE.LI.replacementPreservesLCSSAForm(IsomorphicInc, OrigInc))
1655 return;
1656
1657 bool BothHaveNUW = false;
1658 bool BothHaveNSW = false;
1659 auto *OBOIncV = dyn_cast<OverflowingBinaryOperator>(OrigInc);
1660 auto *OBOIsomorphic = dyn_cast<OverflowingBinaryOperator>(IsomorphicInc);
1661 if (OBOIncV && OBOIsomorphic) {
1662 BothHaveNUW =
1663 OBOIncV->hasNoUnsignedWrap() && OBOIsomorphic->hasNoUnsignedWrap();
1664 BothHaveNSW =
1665 OBOIncV->hasNoSignedWrap() && OBOIsomorphic->hasNoSignedWrap();
1666 }
1667
1668 if (!hoistIVInc(OrigInc, IsomorphicInc,
1669 /*RecomputePoisonFlags*/ true))
1670 return;
1671
1672 // We are replacing with a wider increment. If both OrigInc and IsomorphicInc
1673 // are NUW/NSW, then we can preserve them on the wider increment; the narrower
1674 // IsomorphicInc would wrap before the wider OrigInc, so the replacement won't
1675 // make IsomorphicInc's uses more poisonous.
1676 assert(OrigInc->getType()->getScalarSizeInBits() >=
1677 IsomorphicInc->getType()->getScalarSizeInBits() &&
1678 "Should only replace an increment with a wider one.");
1679 if (BothHaveNUW || BothHaveNSW) {
1680 OrigInc->setHasNoUnsignedWrap(OBOIncV->hasNoUnsignedWrap() || BothHaveNUW);
1681 OrigInc->setHasNoSignedWrap(OBOIncV->hasNoSignedWrap() || BothHaveNSW);
1682 }
1683
1684 SCEV_DEBUG_WITH_TYPE(DebugType,
1685 dbgs() << "INDVARS: Eliminated congruent iv.inc: "
1686 << *IsomorphicInc << '\n');
1687 Value *NewInc = OrigInc;
1688 if (OrigInc->getType() != IsomorphicInc->getType()) {
1690 if (PHINode *PN = dyn_cast<PHINode>(OrigInc))
1691 IP = PN->getParent()->getFirstInsertionPt();
1692 else
1693 IP = OrigInc->getNextNonDebugInstruction()->getIterator();
1694
1695 IRBuilder<> Builder(IP->getParent(), IP);
1696 Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
1697 NewInc =
1698 Builder.CreateTruncOrBitCast(OrigInc, IsomorphicInc->getType(), IVName);
1699 }
1700 IsomorphicInc->replaceAllUsesWith(NewInc);
1701 DeadInsts.emplace_back(IsomorphicInc);
1702}
1703
1704/// replaceCongruentIVs - Check for congruent phis in this loop header and
1705/// replace them with their most canonical representative. Return the number of
1706/// phis eliminated.
1707///
1708/// This does not depend on any SCEVExpander state but should be used in
1709/// the same context that SCEVExpander is used.
1710unsigned
1713 const TargetTransformInfo *TTI) {
1714 // Find integer phis in order of increasing width.
1716 for (PHINode &PN : L->getHeader()->phis())
1717 Phis.push_back(&PN);
1718
1719 if (TTI)
1720 // Use stable_sort to preserve order of equivalent PHIs, so the order
1721 // of the sorted Phis is the same from run to run on the same loop.
1722 llvm::stable_sort(Phis, [](Value *LHS, Value *RHS) {
1723 // Put pointers at the back and make sure pointer < pointer = false.
1724 if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
1725 return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
1728 });
1729
1730 unsigned NumElim = 0;
1732 // Process phis from wide to narrow. Map wide phis to their truncation
1733 // so narrow phis can reuse them.
1734 for (PHINode *Phi : Phis) {
1735 auto SimplifyPHINode = [&](PHINode *PN) -> Value * {
1736 if (Value *V = simplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC}))
1737 return V;
1738 if (!SE.isSCEVable(PN->getType()))
1739 return nullptr;
1740 auto *Const = dyn_cast<SCEVConstant>(SE.getSCEV(PN));
1741 if (!Const)
1742 return nullptr;
1743 return Const->getValue();
1744 };
1745
1746 // Fold constant phis. They may be congruent to other constant phis and
1747 // would confuse the logic below that expects proper IVs.
1748 if (Value *V = SimplifyPHINode(Phi)) {
1749 if (V->getType() != Phi->getType())
1750 continue;
1751 SE.forgetValue(Phi);
1752 Phi->replaceAllUsesWith(V);
1753 DeadInsts.emplace_back(Phi);
1754 ++NumElim;
1755 SCEV_DEBUG_WITH_TYPE(DebugType,
1756 dbgs() << "INDVARS: Eliminated constant iv: " << *Phi
1757 << '\n');
1758 continue;
1759 }
1760
1761 if (!SE.isSCEVable(Phi->getType()))
1762 continue;
1763
1764 PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
1765 if (!OrigPhiRef) {
1766 OrigPhiRef = Phi;
1767 if (Phi->getType()->isIntegerTy() && TTI &&
1768 TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
1769 // Make sure we only rewrite using simple induction variables;
1770 // otherwise, we can make the trip count of a loop unanalyzable
1771 // to SCEV.
1772 const SCEV *PhiExpr = SE.getSCEV(Phi);
1773 if (isa<SCEVAddRecExpr>(PhiExpr)) {
1774 // This phi can be freely truncated to the narrowest phi type. Map the
1775 // truncated expression to it so it will be reused for narrow types.
1776 const SCEV *TruncExpr =
1777 SE.getTruncateExpr(PhiExpr, Phis.back()->getType());
1778 ExprToIVMap[TruncExpr] = Phi;
1779 }
1780 }
1781 continue;
1782 }
1783
1784 // Replacing a pointer phi with an integer phi or vice-versa doesn't make
1785 // sense.
1786 if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
1787 continue;
1788
1789 replaceCongruentIVInc(Phi, OrigPhiRef, L, DT, DeadInsts);
1790 SCEV_DEBUG_WITH_TYPE(DebugType,
1791 dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi
1792 << '\n');
1794 DebugType, dbgs() << "INDVARS: Original iv: " << *OrigPhiRef << '\n');
1795 ++NumElim;
1796 Value *NewIV = OrigPhiRef;
1797 if (OrigPhiRef->getType() != Phi->getType()) {
1798 IRBuilder<> Builder(L->getHeader(),
1799 L->getHeader()->getFirstInsertionPt());
1800 Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
1801 NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
1802 }
1803 Phi->replaceAllUsesWith(NewIV);
1804 DeadInsts.emplace_back(Phi);
1805 }
1806 return NumElim;
1807}
1808
1810 const Instruction *At,
1811 Loop *L) {
1812 using namespace llvm::PatternMatch;
1813
1814 SmallVector<BasicBlock *, 4> ExitingBlocks;
1815 L->getExitingBlocks(ExitingBlocks);
1816
1817 // Look for suitable value in simple conditions at the loop exits.
1818 for (BasicBlock *BB : ExitingBlocks) {
1819 CmpPredicate Pred;
1820 Instruction *LHS, *RHS;
1821
1822 if (!match(BB->getTerminator(),
1825 continue;
1826
1827 if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
1828 return true;
1829
1830 if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
1831 return true;
1832 }
1833
1834 // Use expand's logic which is used for reusing a previous Value in
1835 // ExprValueMap. Note that we don't currently model the cost of
1836 // needing to drop poison generating flags on the instruction if we
1837 // want to reuse it. We effectively assume that has zero cost.
1838 SmallVector<Instruction *> DropPoisonGeneratingInsts;
1839 return FindValueInExprValueMap(S, At, DropPoisonGeneratingInsts) != nullptr;
1840}
1841
1842template<typename T> static InstructionCost costAndCollectOperands(
1845 SmallVectorImpl<SCEVOperand> &Worklist) {
1846
1847 const T *S = cast<T>(WorkItem.S);
1849 // Object to help map SCEV operands to expanded IR instructions.
1850 struct OperationIndices {
1851 OperationIndices(unsigned Opc, size_t min, size_t max) :
1852 Opcode(Opc), MinIdx(min), MaxIdx(max) { }
1853 unsigned Opcode;
1854 size_t MinIdx;
1855 size_t MaxIdx;
1856 };
1857
1858 // Collect the operations of all the instructions that will be needed to
1859 // expand the SCEVExpr. This is so that when we come to cost the operands,
1860 // we know what the generated user(s) will be.
1862
1863 auto CastCost = [&](unsigned Opcode) -> InstructionCost {
1864 Operations.emplace_back(Opcode, 0, 0);
1865 return TTI.getCastInstrCost(Opcode, S->getType(),
1866 S->getOperand(0)->getType(),
1868 };
1869
1870 auto ArithCost = [&](unsigned Opcode, unsigned NumRequired,
1871 unsigned MinIdx = 0,
1872 unsigned MaxIdx = 1) -> InstructionCost {
1873 Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1874 return NumRequired *
1875 TTI.getArithmeticInstrCost(Opcode, S->getType(), CostKind);
1876 };
1877
1878 auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, unsigned MinIdx,
1879 unsigned MaxIdx) -> InstructionCost {
1880 Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1881 Type *OpType = S->getType();
1882 return NumRequired * TTI.getCmpSelInstrCost(
1883 Opcode, OpType, CmpInst::makeCmpResultType(OpType),
1885 };
1886
1887 switch (S->getSCEVType()) {
1888 case scCouldNotCompute:
1889 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1890 case scUnknown:
1891 case scConstant:
1892 case scVScale:
1893 return 0;
1894 case scPtrToInt:
1895 Cost = CastCost(Instruction::PtrToInt);
1896 break;
1897 case scTruncate:
1898 Cost = CastCost(Instruction::Trunc);
1899 break;
1900 case scZeroExtend:
1901 Cost = CastCost(Instruction::ZExt);
1902 break;
1903 case scSignExtend:
1904 Cost = CastCost(Instruction::SExt);
1905 break;
1906 case scUDivExpr: {
1907 unsigned Opcode = Instruction::UDiv;
1908 if (auto *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
1909 if (SC->getAPInt().isPowerOf2())
1910 Opcode = Instruction::LShr;
1911 Cost = ArithCost(Opcode, 1);
1912 break;
1913 }
1914 case scAddExpr:
1915 Cost = ArithCost(Instruction::Add, S->getNumOperands() - 1);
1916 break;
1917 case scMulExpr:
1918 // TODO: this is a very pessimistic cost modelling for Mul,
1919 // because of Bin Pow algorithm actually used by the expander,
1920 // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
1921 Cost = ArithCost(Instruction::Mul, S->getNumOperands() - 1);
1922 break;
1923 case scSMaxExpr:
1924 case scUMaxExpr:
1925 case scSMinExpr:
1926 case scUMinExpr:
1927 case scSequentialUMinExpr: {
1928 // FIXME: should this ask the cost for Intrinsic's?
1929 // The reduction tree.
1930 Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1);
1931 Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2);
1932 switch (S->getSCEVType()) {
1933 case scSequentialUMinExpr: {
1934 // The safety net against poison.
1935 // FIXME: this is broken.
1936 Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 0);
1937 Cost += ArithCost(Instruction::Or,
1938 S->getNumOperands() > 2 ? S->getNumOperands() - 2 : 0);
1939 Cost += CmpSelCost(Instruction::Select, 1, 0, 1);
1940 break;
1941 }
1942 default:
1943 assert(!isa<SCEVSequentialMinMaxExpr>(S) &&
1944 "Unhandled SCEV expression type?");
1945 break;
1946 }
1947 break;
1948 }
1949 case scAddRecExpr: {
1950 // Addrec expands to a phi and add per recurrence.
1951 unsigned NumRecurrences = S->getNumOperands() - 1;
1952 Cost += TTI.getCFInstrCost(Instruction::PHI, CostKind) * NumRecurrences;
1953 Cost +=
1954 TTI.getArithmeticInstrCost(Instruction::Add, S->getType(), CostKind) *
1955 NumRecurrences;
1956 // AR start is used in phi.
1957 Worklist.emplace_back(Instruction::PHI, 0, S->getOperand(0));
1958 // Other operands are used in add.
1959 for (const SCEV *Op : S->operands().drop_front())
1960 Worklist.emplace_back(Instruction::Add, 1, Op);
1961 break;
1962 }
1963 }
1964
1965 for (auto &CostOp : Operations) {
1966 for (auto SCEVOp : enumerate(S->operands())) {
1967 // Clamp the index to account for multiple IR operations being chained.
1968 size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx);
1969 size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx);
1970 Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value());
1971 }
1972 }
1973 return Cost;
1974}
1975
1976bool SCEVExpander::isHighCostExpansionHelper(
1977 const SCEVOperand &WorkItem, Loop *L, const Instruction &At,
1978 InstructionCost &Cost, unsigned Budget, const TargetTransformInfo &TTI,
1980 SmallVectorImpl<SCEVOperand> &Worklist) {
1981 if (Cost > Budget)
1982 return true; // Already run out of budget, give up.
1983
1984 const SCEV *S = WorkItem.S;
1985 // Was the cost of expansion of this expression already accounted for?
1986 if (!isa<SCEVConstant>(S) && !Processed.insert(S).second)
1987 return false; // We have already accounted for this expression.
1988
1989 // If we can find an existing value for this scev available at the point "At"
1990 // then consider the expression cheap.
1991 if (hasRelatedExistingExpansion(S, &At, L))
1992 return false; // Consider the expression to be free.
1993
1995 L->getHeader()->getParent()->hasMinSize()
1998
1999 switch (S->getSCEVType()) {
2000 case scCouldNotCompute:
2001 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
2002 case scUnknown:
2003 case scVScale:
2004 // Assume to be zero-cost.
2005 return false;
2006 case scConstant: {
2007 // Only evalulate the costs of constants when optimizing for size.
2009 return false;
2010 const APInt &Imm = cast<SCEVConstant>(S)->getAPInt();
2011 Type *Ty = S->getType();
2013 WorkItem.ParentOpcode, WorkItem.OperandIdx, Imm, Ty, CostKind);
2014 return Cost > Budget;
2015 }
2016 case scTruncate:
2017 case scPtrToInt:
2018 case scZeroExtend:
2019 case scSignExtend: {
2020 Cost +=
2021 costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist);
2022 return false; // Will answer upon next entry into this function.
2023 }
2024 case scUDivExpr: {
2025 // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
2026 // HowManyLessThans produced to compute a precise expression, rather than a
2027 // UDiv from the user's code. If we can't find a UDiv in the code with some
2028 // simple searching, we need to account for it's cost.
2029
2030 // At the beginning of this function we already tried to find existing
2031 // value for plain 'S'. Now try to lookup 'S + 1' since it is common
2032 // pattern involving division. This is just a simple search heuristic.
2034 SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L))
2035 return false; // Consider it to be free.
2036
2037 Cost +=
2038 costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist);
2039 return false; // Will answer upon next entry into this function.
2040 }
2041 case scAddExpr:
2042 case scMulExpr:
2043 case scUMaxExpr:
2044 case scSMaxExpr:
2045 case scUMinExpr:
2046 case scSMinExpr:
2047 case scSequentialUMinExpr: {
2048 assert(cast<SCEVNAryExpr>(S)->getNumOperands() > 1 &&
2049 "Nary expr should have more than 1 operand.");
2050 // The simple nary expr will require one less op (or pair of ops)
2051 // than the number of it's terms.
2052 Cost +=
2053 costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist);
2054 return Cost > Budget;
2055 }
2056 case scAddRecExpr: {
2057 assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= 2 &&
2058 "Polynomial should be at least linear");
2059 Cost += costAndCollectOperands<SCEVAddRecExpr>(
2060 WorkItem, TTI, CostKind, Worklist);
2061 return Cost > Budget;
2062 }
2063 }
2064 llvm_unreachable("Unknown SCEV kind!");
2065}
2066
2068 Instruction *IP) {
2069 assert(IP);
2070 switch (Pred->getKind()) {
2072 return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP);
2074 return expandComparePredicate(cast<SCEVComparePredicate>(Pred), IP);
2075 case SCEVPredicate::P_Wrap: {
2076 auto *AddRecPred = cast<SCEVWrapPredicate>(Pred);
2077 return expandWrapPredicate(AddRecPred, IP);
2078 }
2079 }
2080 llvm_unreachable("Unknown SCEV predicate type");
2081}
2082
2084 Instruction *IP) {
2085 Value *Expr0 = expand(Pred->getLHS(), IP);
2086 Value *Expr1 = expand(Pred->getRHS(), IP);
2087
2088 Builder.SetInsertPoint(IP);
2089 auto InvPred = ICmpInst::getInversePredicate(Pred->getPredicate());
2090 auto *I = Builder.CreateICmp(InvPred, Expr0, Expr1, "ident.check");
2091 return I;
2092}
2093
2095 Instruction *Loc, bool Signed) {
2096 assert(AR->isAffine() && "Cannot generate RT check for "
2097 "non-affine expression");
2098
2099 // FIXME: It is highly suspicious that we're ignoring the predicates here.
2101 const SCEV *ExitCount =
2103
2104 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count");
2105
2106 const SCEV *Step = AR->getStepRecurrence(SE);
2107 const SCEV *Start = AR->getStart();
2108
2109 Type *ARTy = AR->getType();
2110 unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType());
2111 unsigned DstBits = SE.getTypeSizeInBits(ARTy);
2112
2113 // The expression {Start,+,Step} has nusw/nssw if
2114 // Step < 0, Start - |Step| * Backedge <= Start
2115 // Step >= 0, Start + |Step| * Backedge > Start
2116 // and |Step| * Backedge doesn't unsigned overflow.
2117
2118 Builder.SetInsertPoint(Loc);
2119 Value *TripCountVal = expand(ExitCount, Loc);
2120
2121 IntegerType *Ty =
2123
2124 Value *StepValue = expand(Step, Loc);
2125 Value *NegStepValue = expand(SE.getNegativeSCEV(Step), Loc);
2126 Value *StartValue = expand(Start, Loc);
2127
2128 ConstantInt *Zero =
2129 ConstantInt::get(Loc->getContext(), APInt::getZero(DstBits));
2130
2131 Builder.SetInsertPoint(Loc);
2132 // Compute |Step|
2133 Value *StepCompare = Builder.CreateICmp(ICmpInst::ICMP_SLT, StepValue, Zero);
2134 Value *AbsStep = Builder.CreateSelect(StepCompare, NegStepValue, StepValue);
2135
2136 // Compute |Step| * Backedge
2137 // Compute:
2138 // 1. Start + |Step| * Backedge < Start
2139 // 2. Start - |Step| * Backedge > Start
2140 //
2141 // And select either 1. or 2. depending on whether step is positive or
2142 // negative. If Step is known to be positive or negative, only create
2143 // either 1. or 2.
2144 auto ComputeEndCheck = [&]() -> Value * {
2145 // Checking <u 0 is always false.
2146 if (!Signed && Start->isZero() && SE.isKnownPositive(Step))
2147 return ConstantInt::getFalse(Loc->getContext());
2148
2149 // Get the backedge taken count and truncate or extended to the AR type.
2150 Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
2151
2152 Value *MulV, *OfMul;
2153 if (Step->isOne()) {
2154 // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
2155 // needed, there is never an overflow, so to avoid artificially inflating
2156 // the cost of the check, directly emit the optimized IR.
2157 MulV = TruncTripCount;
2158 OfMul = ConstantInt::getFalse(MulV->getContext());
2159 } else {
2160 CallInst *Mul = Builder.CreateIntrinsic(Intrinsic::umul_with_overflow, Ty,
2161 {AbsStep, TruncTripCount},
2162 /*FMFSource=*/nullptr, "mul");
2163 MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
2164 OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
2165 }
2166
2167 Value *Add = nullptr, *Sub = nullptr;
2168 bool NeedPosCheck = !SE.isKnownNegative(Step);
2169 bool NeedNegCheck = !SE.isKnownPositive(Step);
2170
2171 if (isa<PointerType>(ARTy)) {
2172 Value *NegMulV = Builder.CreateNeg(MulV);
2173 if (NeedPosCheck)
2174 Add = Builder.CreatePtrAdd(StartValue, MulV);
2175 if (NeedNegCheck)
2176 Sub = Builder.CreatePtrAdd(StartValue, NegMulV);
2177 } else {
2178 if (NeedPosCheck)
2179 Add = Builder.CreateAdd(StartValue, MulV);
2180 if (NeedNegCheck)
2181 Sub = Builder.CreateSub(StartValue, MulV);
2182 }
2183
2184 Value *EndCompareLT = nullptr;
2185 Value *EndCompareGT = nullptr;
2186 Value *EndCheck = nullptr;
2187 if (NeedPosCheck)
2188 EndCheck = EndCompareLT = Builder.CreateICmp(
2190 if (NeedNegCheck)
2191 EndCheck = EndCompareGT = Builder.CreateICmp(
2192 Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
2193 if (NeedPosCheck && NeedNegCheck) {
2194 // Select the answer based on the sign of Step.
2195 EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
2196 }
2197 return Builder.CreateOr(EndCheck, OfMul);
2198 };
2199 Value *EndCheck = ComputeEndCheck();
2200
2201 // If the backedge taken count type is larger than the AR type,
2202 // check that we don't drop any bits by truncating it. If we are
2203 // dropping bits, then we have overflow (unless the step is zero).
2204 if (SrcBits > DstBits) {
2205 auto MaxVal = APInt::getMaxValue(DstBits).zext(SrcBits);
2206 auto *BackedgeCheck =
2207 Builder.CreateICmp(ICmpInst::ICMP_UGT, TripCountVal,
2208 ConstantInt::get(Loc->getContext(), MaxVal));
2209 BackedgeCheck = Builder.CreateAnd(
2210 BackedgeCheck, Builder.CreateICmp(ICmpInst::ICMP_NE, StepValue, Zero));
2211
2212 EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck);
2213 }
2214
2215 return EndCheck;
2216}
2217
2219 Instruction *IP) {
2220 const auto *A = cast<SCEVAddRecExpr>(Pred->getExpr());
2221 Value *NSSWCheck = nullptr, *NUSWCheck = nullptr;
2222
2223 // Add a check for NUSW
2225 NUSWCheck = generateOverflowCheck(A, IP, false);
2226
2227 // Add a check for NSSW
2229 NSSWCheck = generateOverflowCheck(A, IP, true);
2230
2231 if (NUSWCheck && NSSWCheck)
2232 return Builder.CreateOr(NUSWCheck, NSSWCheck);
2233
2234 if (NUSWCheck)
2235 return NUSWCheck;
2236
2237 if (NSSWCheck)
2238 return NSSWCheck;
2239
2240 return ConstantInt::getFalse(IP->getContext());
2241}
2242
2244 Instruction *IP) {
2245 // Loop over all checks in this set.
2246 SmallVector<Value *> Checks;
2247 for (const auto *Pred : Union->getPredicates()) {
2248 Checks.push_back(expandCodeForPredicate(Pred, IP));
2249 Builder.SetInsertPoint(IP);
2250 }
2251
2252 if (Checks.empty())
2253 return ConstantInt::getFalse(IP->getContext());
2254 return Builder.CreateOr(Checks);
2255}
2256
2257Value *SCEVExpander::fixupLCSSAFormFor(Value *V) {
2258 auto *DefI = dyn_cast<Instruction>(V);
2259 if (!PreserveLCSSA || !DefI)
2260 return V;
2261
2262 BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
2263 Loop *DefLoop = SE.LI.getLoopFor(DefI->getParent());
2264 Loop *UseLoop = SE.LI.getLoopFor(InsertPt->getParent());
2265 if (!DefLoop || UseLoop == DefLoop || DefLoop->contains(UseLoop))
2266 return V;
2267
2268 // Create a temporary instruction to at the current insertion point, so we
2269 // can hand it off to the helper to create LCSSA PHIs if required for the
2270 // new use.
2271 // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor)
2272 // would accept a insertion point and return an LCSSA phi for that
2273 // insertion point, so there is no need to insert & remove the temporary
2274 // instruction.
2275 Type *ToTy;
2276 if (DefI->getType()->isIntegerTy())
2277 ToTy = PointerType::get(DefI->getContext(), 0);
2278 else
2279 ToTy = Type::getInt32Ty(DefI->getContext());
2280 Instruction *User =
2281 CastInst::CreateBitOrPointerCast(DefI, ToTy, "tmp.lcssa.user", InsertPt);
2282 auto RemoveUserOnExit =
2283 make_scope_exit([User]() { User->eraseFromParent(); });
2284
2286 ToUpdate.push_back(DefI);
2287 SmallVector<PHINode *, 16> PHIsToRemove;
2288 SmallVector<PHINode *, 16> InsertedPHIs;
2289 formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE, &PHIsToRemove,
2290 &InsertedPHIs);
2291 for (PHINode *PN : InsertedPHIs)
2292 rememberInstruction(PN);
2293 for (PHINode *PN : PHIsToRemove) {
2294 if (!PN->use_empty())
2295 continue;
2296 InsertedValues.erase(PN);
2297 InsertedPostIncValues.erase(PN);
2298 PN->eraseFromParent();
2299 }
2300
2301 return User->getOperand(0);
2302}
2303
2304namespace {
2305// Search for a SCEV subexpression that is not safe to expand. Any expression
2306// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
2307// UDiv expressions. We don't know if the UDiv is derived from an IR divide
2308// instruction, but the important thing is that we prove the denominator is
2309// nonzero before expansion.
2310//
2311// IVUsers already checks that IV-derived expressions are safe. So this check is
2312// only needed when the expression includes some subexpression that is not IV
2313// derived.
2314//
2315// Currently, we only allow division by a value provably non-zero here.
2316//
2317// We cannot generally expand recurrences unless the step dominates the loop
2318// header. The expander handles the special case of affine recurrences by
2319// scaling the recurrence outside the loop, but this technique isn't generally
2320// applicable. Expanding a nested recurrence outside a loop requires computing
2321// binomial coefficients. This could be done, but the recurrence has to be in a
2322// perfectly reduced form, which can't be guaranteed.
2323struct SCEVFindUnsafe {
2324 ScalarEvolution &SE;
2325 bool CanonicalMode;
2326 bool IsUnsafe = false;
2327
2328 SCEVFindUnsafe(ScalarEvolution &SE, bool CanonicalMode)
2329 : SE(SE), CanonicalMode(CanonicalMode) {}
2330
2331 bool follow(const SCEV *S) {
2332 if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
2333 if (!SE.isKnownNonZero(D->getRHS())) {
2334 IsUnsafe = true;
2335 return false;
2336 }
2337 }
2338 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
2339 // For non-affine addrecs or in non-canonical mode we need a preheader
2340 // to insert into.
2341 if (!AR->getLoop()->getLoopPreheader() &&
2342 (!CanonicalMode || !AR->isAffine())) {
2343 IsUnsafe = true;
2344 return false;
2345 }
2346 }
2347 return true;
2348 }
2349 bool isDone() const { return IsUnsafe; }
2350};
2351} // namespace
2352
2354 SCEVFindUnsafe Search(SE, CanonicalMode);
2355 visitAll(S, Search);
2356 return !Search.IsUnsafe;
2357}
2358
2360 const Instruction *InsertionPoint) const {
2361 if (!isSafeToExpand(S))
2362 return false;
2363 // We have to prove that the expanded site of S dominates InsertionPoint.
2364 // This is easy when not in the same block, but hard when S is an instruction
2365 // to be expanded somewhere inside the same block as our insertion point.
2366 // What we really need here is something analogous to an OrderedBasicBlock,
2367 // but for the moment, we paper over the problem by handling two common and
2368 // cheap to check cases.
2369 if (SE.properlyDominates(S, InsertionPoint->getParent()))
2370 return true;
2371 if (SE.dominates(S, InsertionPoint->getParent())) {
2372 if (InsertionPoint->getParent()->getTerminator() == InsertionPoint)
2373 return true;
2374 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S))
2375 if (llvm::is_contained(InsertionPoint->operand_values(), U->getValue()))
2376 return true;
2377 }
2378 return false;
2379}
2380
2382 // Result is used, nothing to remove.
2383 if (ResultUsed)
2384 return;
2385
2386 // Restore original poison flags.
2387 for (auto [I, Flags] : Expander.OrigFlags)
2388 Flags.apply(I);
2389
2390 auto InsertedInstructions = Expander.getAllInsertedInstructions();
2391#ifndef NDEBUG
2392 SmallPtrSet<Instruction *, 8> InsertedSet(InsertedInstructions.begin(),
2393 InsertedInstructions.end());
2394 (void)InsertedSet;
2395#endif
2396 // Remove sets with value handles.
2397 Expander.clear();
2398
2399 // Remove all inserted instructions.
2400 for (Instruction *I : reverse(InsertedInstructions)) {
2401#ifndef NDEBUG
2402 assert(all_of(I->users(),
2403 [&InsertedSet](Value *U) {
2404 return InsertedSet.contains(cast<Instruction>(U));
2405 }) &&
2406 "removed instruction should only be used by instructions inserted "
2407 "during expansion");
2408#endif
2409 assert(!I->getType()->isVoidTy() &&
2410 "inserted instruction should have non-void types");
2411 I->replaceAllUsesWith(PoisonValue::get(I->getType()));
2412 I->eraseFromParent();
2413 }
2414}
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Common GEP
Hexagon Hardware Loops
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR)
static const Loop * PickMostRelevantLoop(const Loop *A, const Loop *B, DominatorTree &DT)
PickMostRelevantLoop - Given two loops pick the one that's most relevant for SCEV expansion.
static InstructionCost costAndCollectOperands(const SCEVOperand &WorkItem, const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, SmallVectorImpl< SCEVOperand > &Worklist)
static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR)
static bool canBeCheaplyTransformed(ScalarEvolution &SE, const SCEVAddRecExpr *Phi, const SCEVAddRecExpr *Requested, bool &InvertStep)
Check whether we can cheaply express the requested SCEV in terms of the available PHI SCEV by truncat...
#define SCEV_DEBUG_WITH_TYPE(TYPE, X)
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallSet class.
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:206
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:444
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:608
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:988
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:702
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:700
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:701
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:787
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:22
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1108
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:2222
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:873
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
This class represents an Operation in the Expression.
bool isNonIntegralPointerType(PointerType *PT) const
Definition: DataLayout.h:352
A debug info location.
Definition: DebugLoc.h:33
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
const BasicBlock & getEntryBlock() const
Definition: Function.h:809
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags noUnsignedWrap()
static GEPNoWrapFlags none()
Value * CreateVScale(Constant *Scaling, const Twine &Name="")
Create a call to llvm.vscale, multiplied by Scaling.
Definition: IRBuilder.cpp:89
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2066
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2547
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1048
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:172
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2060
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2566
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:2002
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:217
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition: IRBuilder.h:1744
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2429
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Definition: IRBuilder.h:142
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1367
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2048
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1498
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2189
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1350
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2034
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1520
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2181
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2383
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:513
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:99
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:475
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:72
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
const Instruction * getNextNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the next non-debug instruction in the same basic block as 'this',...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472
Class to represent integer types.
Definition: DerivedTypes.h:42
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:439
bool movementPreservesLCSSAForm(Instruction *Inst, Instruction *NewLoc)
Checks if moving a specific instruction can break LCSSA in any loop.
Definition: LoopInfo.h:465
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
ICmpInst::Predicate getPredicate() const
Returns the comparison predicate underlying the intrinsic.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
bool isComplete() const
If the PHI node is complete which means all of its parent's predecessors have incoming value in this ...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const SCEV * getOperand() const
This class represents an assumption that the expression LHS Pred RHS evaluates to true,...
const SCEV * getRHS() const
Returns the right hand side of the predicate.
ICmpInst::Predicate getPredicate() const
const SCEV * getLHS() const
Returns the left hand side of the predicate.
This class represents a constant integer value.
Value * generateOverflowCheck(const SCEVAddRecExpr *AR, Instruction *Loc, bool Signed)
Generates code that evaluates if the AR expression will overflow.
bool hasRelatedExistingExpansion(const SCEV *S, const Instruction *At, Loop *L)
Determine whether there is an existing expansion of S that can be reused.
SmallVector< Instruction *, 32 > getAllInsertedInstructions() const
Return a vector containing all instructions inserted during expansion.
bool isSafeToExpand(const SCEV *S) const
Return true if the given expression is safe to expand in the sense that all materialized values are s...
bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint) const
Return true if the given expression is safe to expand in the sense that all materialized values are d...
unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetTransformInfo *TTI=nullptr)
replace congruent phis with their most canonical representative.
Value * expandUnionPredicate(const SCEVUnionPredicate *Pred, Instruction *Loc)
A specialized variant of expandCodeForPredicate, handling the case when we are expanding code for a S...
bool hoistIVInc(Instruction *IncV, Instruction *InsertPos, bool RecomputePoisonFlags=false)
Utility for hoisting IncV (with all subexpressions requried for its computation) before InsertPos.
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
bool isInsertedInstruction(Instruction *I) const
Return true if the specified instruction was inserted by the code rewriter.
Value * expandCodeForPredicate(const SCEVPredicate *Pred, Instruction *Loc)
Generates a code sequence that evaluates this predicate.
static bool canReuseFlagsFromOriginalIVInc(PHINode *OrigPhi, PHINode *WidePhi, Instruction *OrigInc, Instruction *WideInc)
Return true if both increments directly increment the corresponding IV PHI nodes and have the same op...
Value * expandComparePredicate(const SCEVComparePredicate *Pred, Instruction *Loc)
A specialized variant of expandCodeForPredicate, handling the case when we are expanding code for a S...
Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
Value * expandWrapPredicate(const SCEVWrapPredicate *P, Instruction *Loc)
A specialized variant of expandCodeForPredicate, handling the case when we are expanding code for a S...
Instruction * getIVIncOperand(Instruction *IncV, Instruction *InsertPos, bool allowScale)
Return the induction variable increment's IV operand.
BasicBlock::iterator findInsertPointAfter(Instruction *I, Instruction *MustDominate) const
Returns a suitable insert point after I, that dominates MustDominate.
void setInsertPoint(Instruction *IP)
Set the current insertion point.
This node represents multiplication of some number of SCEVs.
This node is a base class providing common functionality for n'ary operators.
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
const SCEV * getOperand(unsigned i) const
ArrayRef< const SCEV * > operands() const
This class represents an assumption made using SCEV expressions which can be checked at run-time.
SCEVPredicateKind getKind() const
This class represents a cast from a pointer to a pointer-sized integer value.
This class represents a signed maximum selection.
This class represents a signed minimum selection.
This class represents a sequential/in-order unsigned minimum selection.
This class represents a sign extension of a small integer value to a larger integer value.
This class represents a truncation of an integer value to a smaller integer value.
This class represents a binary unsigned division operation.
const SCEV * getLHS() const
const SCEV * getRHS() const
This class represents an unsigned maximum selection.
This class represents an unsigned minimum selection.
This class represents a composition of other SCEV predicates, and is the class that most clients will...
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents the value of vscale, as used when defining the length of a scalable vector or r...
This class represents an assumption made on an AddRec expression.
const SCEVAddRecExpr * getExpr() const
Implementation of the SCEVPredicate interface.
IncrementWrapFlags getFlags() const
Returns the set assumed no overflow flags.
This class represents a zero extension of a small integer value to a larger integer value.
This class represents an analyzed expression in the program.
ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
bool isOne() const
Return true if the expression is a constant one.
bool isZero() const
Return true if the expression is a constant zero.
bool isNonConstantNegative() const
Return true if the specified scev is negated, but not a constant.
SCEVTypes getSCEVType() const
Type * getType() const
Return the LLVM type of this SCEV expression.
NoWrapFlags
NoWrapFlags are bitfield indices into SubclassData.
The main scalar evolution driver.
bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
const SCEV * removePointerBase(const SCEV *S)
Compute an expression equivalent to S - getPointerBase(S).
bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getTruncateOrNoop(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
bool containsAddRecurrence(const SCEV *S)
Return true if the SCEV is a scAddRecExpr or it contains scAddRecExpr.
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
static SCEV::NoWrapFlags clearFlags(SCEV::NoWrapFlags Flags, SCEV::NoWrapFlags OffFlags)
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
const SCEV * getNoopOrAnyExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
std::optional< SCEV::NoWrapFlags > getStrengthenedNoWrapFlagsFromBinOp(const OverflowingBinaryOperator *OBO)
Parse NSW/NUW flags from add/sub/mul IR binary operation Op into SCEV no-wrap flags,...
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
bool dominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV dominate the specified basic block.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getUnknown(Value *V)
static SCEV::NoWrapFlags maskFlags(SCEV::NoWrapFlags Flags, int Mask)
Convenient NoWrapFlags manipulation that hides enum casts and is visible in the ScalarEvolution name ...
bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
bool canReuseInstruction(const SCEV *S, Instruction *I, SmallVectorImpl< Instruction * > &DropPoisonGeneratingInsts)
Check whether it is poison-safe to represent the expression S using the instruction I.
const SCEV * getPredicatedSymbolicMaxBackedgeTakenCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > &Predicates)
Similar to getSymbolicMaxBackedgeTakenCount, except it will add a set of SCEV predicates to Predicate...
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
@ None
The cast is not used with a load/store of any kind.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:288
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define UINT64_MAX
Definition: DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
Definition: PatternMatch.h:619
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:826
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:885
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
Definition: PatternMatch.h:189
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void visitAll(const SCEV *Root, SV &Visitor)
Use SCEVTraversal to visit all nodes in the given expression tree.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
void stable_sort(R &&Range)
Definition: STLExtras.h:2037
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
auto pred_end(const MachineBasicBlock *BB)
auto pred_size(const MachineBasicBlock *BB)
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
cl::opt< unsigned > SCEVCheapExpansionBudget
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
@ Mul
Product of integers.
@ Add
Sum of integers.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
Definition: LCSSA.cpp:325
auto pred_begin(const MachineBasicBlock *BB)
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
InstructionCost Cost
std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
void apply(Instruction *I)
PoisonFlags(const Instruction *I)
struct for holding enough information to help calculate the cost of the given SCEV when expanded into...