LLVM 20.0.0git
ValueTracking.cpp
Go to the documentation of this file.
1//===- ValueTracking.cpp - Walk computations to compute properties --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains routines that help analyze properties that chains of
10// computations have.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/APFloat.h"
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/ScopeExit.h"
21#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/StringRef.h"
32#include "llvm/Analysis/Loads.h"
38#include "llvm/IR/Argument.h"
39#include "llvm/IR/Attributes.h"
40#include "llvm/IR/BasicBlock.h"
41#include "llvm/IR/Constant.h"
43#include "llvm/IR/Constants.h"
46#include "llvm/IR/Dominators.h"
48#include "llvm/IR/Function.h"
50#include "llvm/IR/GlobalAlias.h"
51#include "llvm/IR/GlobalValue.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
57#include "llvm/IR/Intrinsics.h"
58#include "llvm/IR/IntrinsicsAArch64.h"
59#include "llvm/IR/IntrinsicsAMDGPU.h"
60#include "llvm/IR/IntrinsicsRISCV.h"
61#include "llvm/IR/IntrinsicsX86.h"
62#include "llvm/IR/LLVMContext.h"
63#include "llvm/IR/Metadata.h"
64#include "llvm/IR/Module.h"
65#include "llvm/IR/Operator.h"
67#include "llvm/IR/Type.h"
68#include "llvm/IR/User.h"
69#include "llvm/IR/Value.h"
77#include <algorithm>
78#include <cassert>
79#include <cstdint>
80#include <optional>
81#include <utility>
82
83using namespace llvm;
84using namespace llvm::PatternMatch;
85
86// Controls the number of uses of the value searched for possible
87// dominating comparisons.
88static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
89 cl::Hidden, cl::init(20));
90
91
92/// Returns the bitwidth of the given scalar or pointer type. For vector types,
93/// returns the element type's bitwidth.
94static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
95 if (unsigned BitWidth = Ty->getScalarSizeInBits())
96 return BitWidth;
97
98 return DL.getPointerTypeSizeInBits(Ty);
99}
100
101// Given the provided Value and, potentially, a context instruction, return
102// the preferred context instruction (if any).
103static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
104 // If we've been provided with a context instruction, then use that (provided
105 // it has been inserted).
106 if (CxtI && CxtI->getParent())
107 return CxtI;
108
109 // If the value is really an already-inserted instruction, then use that.
110 CxtI = dyn_cast<Instruction>(V);
111 if (CxtI && CxtI->getParent())
112 return CxtI;
113
114 return nullptr;
115}
116
117static const Instruction *safeCxtI(const Value *V1, const Value *V2, const Instruction *CxtI) {
118 // If we've been provided with a context instruction, then use that (provided
119 // it has been inserted).
120 if (CxtI && CxtI->getParent())
121 return CxtI;
122
123 // If the value is really an already-inserted instruction, then use that.
124 CxtI = dyn_cast<Instruction>(V1);
125 if (CxtI && CxtI->getParent())
126 return CxtI;
127
128 CxtI = dyn_cast<Instruction>(V2);
129 if (CxtI && CxtI->getParent())
130 return CxtI;
131
132 return nullptr;
133}
134
136 const APInt &DemandedElts,
137 APInt &DemandedLHS, APInt &DemandedRHS) {
138 if (isa<ScalableVectorType>(Shuf->getType())) {
139 assert(DemandedElts == APInt(1,1));
140 DemandedLHS = DemandedRHS = DemandedElts;
141 return true;
142 }
143
144 int NumElts =
145 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
146 return llvm::getShuffleDemandedElts(NumElts, Shuf->getShuffleMask(),
147 DemandedElts, DemandedLHS, DemandedRHS);
148}
149
150static void computeKnownBits(const Value *V, const APInt &DemandedElts,
151 KnownBits &Known, unsigned Depth,
152 const SimplifyQuery &Q);
153
154void llvm::computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
155 const SimplifyQuery &Q) {
156 // Since the number of lanes in a scalable vector is unknown at compile time,
157 // we track one bit which is implicitly broadcast to all lanes. This means
158 // that all lanes in a scalable vector are considered demanded.
159 auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
160 APInt DemandedElts =
161 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
162 ::computeKnownBits(V, DemandedElts, Known, Depth, Q);
163}
164
166 const DataLayout &DL, unsigned Depth,
167 AssumptionCache *AC, const Instruction *CxtI,
168 const DominatorTree *DT, bool UseInstrInfo) {
170 V, Known, Depth,
171 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
172}
173
175 unsigned Depth, AssumptionCache *AC,
176 const Instruction *CxtI,
177 const DominatorTree *DT, bool UseInstrInfo) {
178 return computeKnownBits(
179 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
180}
181
182KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
183 const DataLayout &DL, unsigned Depth,
184 AssumptionCache *AC, const Instruction *CxtI,
185 const DominatorTree *DT, bool UseInstrInfo) {
186 return computeKnownBits(
187 V, DemandedElts, Depth,
188 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
189}
190
191static bool haveNoCommonBitsSetSpecialCases(const Value *LHS, const Value *RHS,
192 const SimplifyQuery &SQ) {
193 // Look for an inverted mask: (X & ~M) op (Y & M).
194 {
195 Value *M;
196 if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
198 isGuaranteedNotToBeUndef(M, SQ.AC, SQ.CxtI, SQ.DT))
199 return true;
200 }
201
202 // X op (Y & ~X)
205 return true;
206
207 // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern
208 // for constant Y.
209 Value *Y;
210 if (match(RHS,
212 isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT) &&
213 isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT))
214 return true;
215
216 // Peek through extends to find a 'not' of the other side:
217 // (ext Y) op ext(~Y)
218 if (match(LHS, m_ZExtOrSExt(m_Value(Y))) &&
220 isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT))
221 return true;
222
223 // Look for: (A & B) op ~(A | B)
224 {
225 Value *A, *B;
226 if (match(LHS, m_And(m_Value(A), m_Value(B))) &&
228 isGuaranteedNotToBeUndef(A, SQ.AC, SQ.CxtI, SQ.DT) &&
229 isGuaranteedNotToBeUndef(B, SQ.AC, SQ.CxtI, SQ.DT))
230 return true;
231 }
232
233 return false;
234}
235
237 const WithCache<const Value *> &RHSCache,
238 const SimplifyQuery &SQ) {
239 const Value *LHS = LHSCache.getValue();
240 const Value *RHS = RHSCache.getValue();
241
242 assert(LHS->getType() == RHS->getType() &&
243 "LHS and RHS should have the same type");
245 "LHS and RHS should be integers");
246
249 return true;
250
252 RHSCache.getKnownBits(SQ));
253}
254
256 return !I->user_empty() && all_of(I->users(), [](const User *U) {
257 return match(U, m_ICmp(m_Value(), m_Zero()));
258 });
259}
260
262 return !I->user_empty() && all_of(I->users(), [](const User *U) {
263 ICmpInst::Predicate P;
264 return match(U, m_ICmp(P, m_Value(), m_Zero())) && ICmpInst::isEquality(P);
265 });
266}
267
268static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
269 const SimplifyQuery &Q);
270
272 bool OrZero, unsigned Depth,
273 AssumptionCache *AC, const Instruction *CxtI,
274 const DominatorTree *DT, bool UseInstrInfo) {
275 return ::isKnownToBeAPowerOfTwo(
276 V, OrZero, Depth,
277 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
278}
279
280static bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
281 const SimplifyQuery &Q, unsigned Depth);
282
284 unsigned Depth) {
285 return computeKnownBits(V, Depth, SQ).isNonNegative();
286}
287
289 unsigned Depth) {
290 if (auto *CI = dyn_cast<ConstantInt>(V))
291 return CI->getValue().isStrictlyPositive();
292
293 // If `isKnownNonNegative` ever becomes more sophisticated, make sure to keep
294 // this updated.
295 KnownBits Known = computeKnownBits(V, Depth, SQ);
296 return Known.isNonNegative() &&
297 (Known.isNonZero() || isKnownNonZero(V, SQ, Depth));
298}
299
301 unsigned Depth) {
302 return computeKnownBits(V, Depth, SQ).isNegative();
303}
304
305static bool isKnownNonEqual(const Value *V1, const Value *V2,
306 const APInt &DemandedElts, unsigned Depth,
307 const SimplifyQuery &Q);
308
309bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
310 const DataLayout &DL, AssumptionCache *AC,
311 const Instruction *CxtI, const DominatorTree *DT,
312 bool UseInstrInfo) {
313 assert(V1->getType() == V2->getType() &&
314 "Testing equality of non-equal types!");
315 auto *FVTy = dyn_cast<FixedVectorType>(V1->getType());
316 APInt DemandedElts =
317 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
318 return ::isKnownNonEqual(
319 V1, V2, DemandedElts, 0,
320 SimplifyQuery(DL, DT, AC, safeCxtI(V2, V1, CxtI), UseInstrInfo));
321}
322
323bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask,
324 const SimplifyQuery &SQ, unsigned Depth) {
325 KnownBits Known(Mask.getBitWidth());
326 computeKnownBits(V, Known, Depth, SQ);
327 return Mask.isSubsetOf(Known.Zero);
328}
329
330static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
331 unsigned Depth, const SimplifyQuery &Q);
332
333static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
334 const SimplifyQuery &Q) {
335 auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
336 APInt DemandedElts =
337 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
338 return ComputeNumSignBits(V, DemandedElts, Depth, Q);
339}
340
341unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
342 unsigned Depth, AssumptionCache *AC,
343 const Instruction *CxtI,
344 const DominatorTree *DT, bool UseInstrInfo) {
345 return ::ComputeNumSignBits(
346 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
347}
348
350 unsigned Depth, AssumptionCache *AC,
351 const Instruction *CxtI,
352 const DominatorTree *DT) {
353 unsigned SignBits = ComputeNumSignBits(V, DL, Depth, AC, CxtI, DT);
354 return V->getType()->getScalarSizeInBits() - SignBits + 1;
355}
356
357static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
358 bool NSW, bool NUW,
359 const APInt &DemandedElts,
360 KnownBits &KnownOut, KnownBits &Known2,
361 unsigned Depth, const SimplifyQuery &Q) {
362 computeKnownBits(Op1, DemandedElts, KnownOut, Depth + 1, Q);
363
364 // If one operand is unknown and we have no nowrap information,
365 // the result will be unknown independently of the second operand.
366 if (KnownOut.isUnknown() && !NSW && !NUW)
367 return;
368
369 computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q);
370 KnownOut = KnownBits::computeForAddSub(Add, NSW, NUW, Known2, KnownOut);
371}
372
373static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
374 const APInt &DemandedElts, KnownBits &Known,
375 KnownBits &Known2, unsigned Depth,
376 const SimplifyQuery &Q) {
377 computeKnownBits(Op1, DemandedElts, Known, Depth + 1, Q);
378 computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q);
379
380 bool isKnownNegative = false;
381 bool isKnownNonNegative = false;
382 // If the multiplication is known not to overflow, compute the sign bit.
383 if (NSW) {
384 if (Op0 == Op1) {
385 // The product of a number with itself is non-negative.
386 isKnownNonNegative = true;
387 } else {
388 bool isKnownNonNegativeOp1 = Known.isNonNegative();
389 bool isKnownNonNegativeOp0 = Known2.isNonNegative();
390 bool isKnownNegativeOp1 = Known.isNegative();
391 bool isKnownNegativeOp0 = Known2.isNegative();
392 // The product of two numbers with the same sign is non-negative.
393 isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
394 (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
395 // The product of a negative number and a non-negative number is either
396 // negative or zero.
399 (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
400 Known2.isNonZero()) ||
401 (isKnownNegativeOp0 && isKnownNonNegativeOp1 && Known.isNonZero());
402 }
403 }
404
405 bool SelfMultiply = Op0 == Op1;
406 if (SelfMultiply)
407 SelfMultiply &=
408 isGuaranteedNotToBeUndef(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1);
409 Known = KnownBits::mul(Known, Known2, SelfMultiply);
410
411 // Only make use of no-wrap flags if we failed to compute the sign bit
412 // directly. This matters if the multiplication always overflows, in
413 // which case we prefer to follow the result of the direct computation,
414 // though as the program is invoking undefined behaviour we can choose
415 // whatever we like here.
416 if (isKnownNonNegative && !Known.isNegative())
417 Known.makeNonNegative();
418 else if (isKnownNegative && !Known.isNonNegative())
419 Known.makeNegative();
420}
421
423 KnownBits &Known) {
424 unsigned BitWidth = Known.getBitWidth();
425 unsigned NumRanges = Ranges.getNumOperands() / 2;
426 assert(NumRanges >= 1);
427
428 Known.Zero.setAllBits();
429 Known.One.setAllBits();
430
431 for (unsigned i = 0; i < NumRanges; ++i) {
433 mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
435 mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
436 ConstantRange Range(Lower->getValue(), Upper->getValue());
437
438 // The first CommonPrefixBits of all values in Range are equal.
439 unsigned CommonPrefixBits =
441 APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits);
443 Known.One &= UnsignedMax & Mask;
444 Known.Zero &= ~UnsignedMax & Mask;
445 }
446}
447
448static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
452
453 // The instruction defining an assumption's condition itself is always
454 // considered ephemeral to that assumption (even if it has other
455 // non-ephemeral users). See r246696's test case for an example.
456 if (is_contained(I->operands(), E))
457 return true;
458
459 while (!WorkSet.empty()) {
460 const Value *V = WorkSet.pop_back_val();
461 if (!Visited.insert(V).second)
462 continue;
463
464 // If all uses of this value are ephemeral, then so is this value.
465 if (llvm::all_of(V->users(), [&](const User *U) {
466 return EphValues.count(U);
467 })) {
468 if (V == E)
469 return true;
470
471 if (V == I || (isa<Instruction>(V) &&
472 !cast<Instruction>(V)->mayHaveSideEffects() &&
473 !cast<Instruction>(V)->isTerminator())) {
474 EphValues.insert(V);
475 if (const User *U = dyn_cast<User>(V))
476 append_range(WorkSet, U->operands());
477 }
478 }
479 }
480
481 return false;
482}
483
484// Is this an intrinsic that cannot be speculated but also cannot trap?
486 if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(I))
487 return CI->isAssumeLikeIntrinsic();
488
489 return false;
490}
491
493 const Instruction *CxtI,
494 const DominatorTree *DT,
495 bool AllowEphemerals) {
496 // There are two restrictions on the use of an assume:
497 // 1. The assume must dominate the context (or the control flow must
498 // reach the assume whenever it reaches the context).
499 // 2. The context must not be in the assume's set of ephemeral values
500 // (otherwise we will use the assume to prove that the condition
501 // feeding the assume is trivially true, thus causing the removal of
502 // the assume).
503
504 if (Inv->getParent() == CxtI->getParent()) {
505 // If Inv and CtxI are in the same block, check if the assume (Inv) is first
506 // in the BB.
507 if (Inv->comesBefore(CxtI))
508 return true;
509
510 // Don't let an assume affect itself - this would cause the problems
511 // `isEphemeralValueOf` is trying to prevent, and it would also make
512 // the loop below go out of bounds.
513 if (!AllowEphemerals && Inv == CxtI)
514 return false;
515
516 // The context comes first, but they're both in the same block.
517 // Make sure there is nothing in between that might interrupt
518 // the control flow, not even CxtI itself.
519 // We limit the scan distance between the assume and its context instruction
520 // to avoid a compile-time explosion. This limit is chosen arbitrarily, so
521 // it can be adjusted if needed (could be turned into a cl::opt).
522 auto Range = make_range(CxtI->getIterator(), Inv->getIterator());
524 return false;
525
526 return AllowEphemerals || !isEphemeralValueOf(Inv, CxtI);
527 }
528
529 // Inv and CxtI are in different blocks.
530 if (DT) {
531 if (DT->dominates(Inv, CxtI))
532 return true;
533 } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) {
534 // We don't have a DT, but this trivially dominates.
535 return true;
536 }
537
538 return false;
539}
540
541// TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but
542// we still have enough information about `RHS` to conclude non-zero. For
543// example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops
544// so the extra compile time may not be worth it, but possibly a second API
545// should be created for use outside of loops.
546static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) {
547 // v u> y implies v != 0.
548 if (Pred == ICmpInst::ICMP_UGT)
549 return true;
550
551 // Special-case v != 0 to also handle v != null.
552 if (Pred == ICmpInst::ICMP_NE)
553 return match(RHS, m_Zero());
554
555 // All other predicates - rely on generic ConstantRange handling.
556 const APInt *C;
558 if (match(RHS, m_APInt(C))) {
560 return !TrueValues.contains(Zero);
561 }
562
563 auto *VC = dyn_cast<ConstantDataVector>(RHS);
564 if (VC == nullptr)
565 return false;
566
567 for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem;
568 ++ElemIdx) {
570 Pred, VC->getElementAsAPInt(ElemIdx));
571 if (TrueValues.contains(Zero))
572 return false;
573 }
574 return true;
575}
576
577static bool isKnownNonZeroFromAssume(const Value *V, const SimplifyQuery &Q) {
578 // Use of assumptions is context-sensitive. If we don't have a context, we
579 // cannot use them!
580 if (!Q.AC || !Q.CxtI)
581 return false;
582
583 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) {
584 if (!Elem.Assume)
585 continue;
586
587 AssumeInst *I = cast<AssumeInst>(Elem.Assume);
588 assert(I->getFunction() == Q.CxtI->getFunction() &&
589 "Got assumption for the wrong function!");
590
591 if (Elem.Index != AssumptionCache::ExprResultIdx) {
592 if (!V->getType()->isPointerTy())
593 continue;
595 *I, I->bundle_op_info_begin()[Elem.Index])) {
596 if (RK.WasOn == V &&
597 (RK.AttrKind == Attribute::NonNull ||
598 (RK.AttrKind == Attribute::Dereferenceable &&
600 V->getType()->getPointerAddressSpace()))) &&
602 return true;
603 }
604 continue;
605 }
606
607 // Warning: This loop can end up being somewhat performance sensitive.
608 // We're running this loop for once for each value queried resulting in a
609 // runtime of ~O(#assumes * #values).
610
611 Value *RHS;
613 auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V)));
614 if (!match(I->getArgOperand(0), m_c_ICmp(Pred, m_V, m_Value(RHS))))
615 return false;
616
617 if (cmpExcludesZero(Pred, RHS) && isValidAssumeForContext(I, Q.CxtI, Q.DT))
618 return true;
619 }
620
621 return false;
622}
623
625 Value *LHS, Value *RHS, KnownBits &Known,
626 const SimplifyQuery &Q) {
627 if (RHS->getType()->isPointerTy()) {
628 // Handle comparison of pointer to null explicitly, as it will not be
629 // covered by the m_APInt() logic below.
630 if (LHS == V && match(RHS, m_Zero())) {
631 switch (Pred) {
632 case ICmpInst::ICMP_EQ:
633 Known.setAllZero();
634 break;
635 case ICmpInst::ICMP_SGE:
636 case ICmpInst::ICMP_SGT:
637 Known.makeNonNegative();
638 break;
639 case ICmpInst::ICMP_SLT:
640 Known.makeNegative();
641 break;
642 default:
643 break;
644 }
645 }
646 return;
647 }
648
649 unsigned BitWidth = Known.getBitWidth();
650 auto m_V =
652
653 Value *Y;
654 const APInt *Mask, *C;
655 uint64_t ShAmt;
656 switch (Pred) {
657 case ICmpInst::ICMP_EQ:
658 // assume(V = C)
659 if (match(LHS, m_V) && match(RHS, m_APInt(C))) {
660 Known = Known.unionWith(KnownBits::makeConstant(*C));
661 // assume(V & Mask = C)
662 } else if (match(LHS, m_c_And(m_V, m_Value(Y))) &&
663 match(RHS, m_APInt(C))) {
664 // For one bits in Mask, we can propagate bits from C to V.
665 Known.One |= *C;
666 if (match(Y, m_APInt(Mask)))
667 Known.Zero |= ~*C & *Mask;
668 // assume(V | Mask = C)
669 } else if (match(LHS, m_c_Or(m_V, m_Value(Y))) && match(RHS, m_APInt(C))) {
670 // For zero bits in Mask, we can propagate bits from C to V.
671 Known.Zero |= ~*C;
672 if (match(Y, m_APInt(Mask)))
673 Known.One |= *C & ~*Mask;
674 // assume(V ^ Mask = C)
675 } else if (match(LHS, m_Xor(m_V, m_APInt(Mask))) &&
676 match(RHS, m_APInt(C))) {
677 // Equivalent to assume(V == Mask ^ C)
678 Known = Known.unionWith(KnownBits::makeConstant(*C ^ *Mask));
679 // assume(V << ShAmt = C)
680 } else if (match(LHS, m_Shl(m_V, m_ConstantInt(ShAmt))) &&
681 match(RHS, m_APInt(C)) && ShAmt < BitWidth) {
682 // For those bits in C that are known, we can propagate them to known
683 // bits in V shifted to the right by ShAmt.
685 RHSKnown.Zero.lshrInPlace(ShAmt);
686 RHSKnown.One.lshrInPlace(ShAmt);
687 Known = Known.unionWith(RHSKnown);
688 // assume(V >> ShAmt = C)
689 } else if (match(LHS, m_Shr(m_V, m_ConstantInt(ShAmt))) &&
690 match(RHS, m_APInt(C)) && ShAmt < BitWidth) {
692 // For those bits in RHS that are known, we can propagate them to known
693 // bits in V shifted to the right by C.
694 Known.Zero |= RHSKnown.Zero << ShAmt;
695 Known.One |= RHSKnown.One << ShAmt;
696 }
697 break;
698 case ICmpInst::ICMP_NE: {
699 // assume (V & B != 0) where B is a power of 2
700 const APInt *BPow2;
701 if (match(LHS, m_And(m_V, m_Power2(BPow2))) && match(RHS, m_Zero()))
702 Known.One |= *BPow2;
703 break;
704 }
705 default:
706 if (match(RHS, m_APInt(C))) {
707 const APInt *Offset = nullptr;
708 if (match(LHS, m_CombineOr(m_V, m_AddLike(m_V, m_APInt(Offset))))) {
710 if (Offset)
711 LHSRange = LHSRange.sub(*Offset);
712 Known = Known.unionWith(LHSRange.toKnownBits());
713 }
714 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
715 // X & Y u> C -> X u> C && Y u> C
716 // X nuw- Y u> C -> X u> C
717 if (match(LHS, m_c_And(m_V, m_Value())) ||
718 match(LHS, m_NUWSub(m_V, m_Value())))
719 Known.One.setHighBits(
720 (*C + (Pred == ICmpInst::ICMP_UGT)).countLeadingOnes());
721 }
722 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
723 // X | Y u< C -> X u< C && Y u< C
724 // X nuw+ Y u< C -> X u< C && Y u< C
725 if (match(LHS, m_c_Or(m_V, m_Value())) ||
726 match(LHS, m_c_NUWAdd(m_V, m_Value()))) {
727 Known.Zero.setHighBits(
728 (*C - (Pred == ICmpInst::ICMP_ULT)).countLeadingZeros());
729 }
730 }
731 }
732 break;
733 }
734}
735
736static void computeKnownBitsFromICmpCond(const Value *V, ICmpInst *Cmp,
737 KnownBits &Known,
738 const SimplifyQuery &SQ, bool Invert) {
740 Invert ? Cmp->getInversePredicate() : Cmp->getPredicate();
741 Value *LHS = Cmp->getOperand(0);
742 Value *RHS = Cmp->getOperand(1);
743
744 // Handle icmp pred (trunc V), C
745 if (match(LHS, m_Trunc(m_Specific(V)))) {
747 computeKnownBitsFromCmp(LHS, Pred, LHS, RHS, DstKnown, SQ);
748 Known = Known.unionWith(DstKnown.anyext(Known.getBitWidth()));
749 return;
750 }
751
752 computeKnownBitsFromCmp(V, Pred, LHS, RHS, Known, SQ);
753}
754
756 KnownBits &Known, unsigned Depth,
757 const SimplifyQuery &SQ, bool Invert) {
758 Value *A, *B;
761 KnownBits Known2(Known.getBitWidth());
762 KnownBits Known3(Known.getBitWidth());
763 computeKnownBitsFromCond(V, A, Known2, Depth + 1, SQ, Invert);
764 computeKnownBitsFromCond(V, B, Known3, Depth + 1, SQ, Invert);
765 if (Invert ? match(Cond, m_LogicalOr(m_Value(), m_Value()))
767 Known2 = Known2.unionWith(Known3);
768 else
769 Known2 = Known2.intersectWith(Known3);
770 Known = Known.unionWith(Known2);
771 }
772
773 if (auto *Cmp = dyn_cast<ICmpInst>(Cond))
774 computeKnownBitsFromICmpCond(V, Cmp, Known, SQ, Invert);
775}
776
778 unsigned Depth, const SimplifyQuery &Q) {
779 // Handle injected condition.
780 if (Q.CC && Q.CC->AffectedValues.contains(V))
781 computeKnownBitsFromCond(V, Q.CC->Cond, Known, Depth, Q, Q.CC->Invert);
782
783 if (!Q.CxtI)
784 return;
785
786 if (Q.DC && Q.DT) {
787 // Handle dominating conditions.
788 for (BranchInst *BI : Q.DC->conditionsFor(V)) {
789 BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
790 if (Q.DT->dominates(Edge0, Q.CxtI->getParent()))
791 computeKnownBitsFromCond(V, BI->getCondition(), Known, Depth, Q,
792 /*Invert*/ false);
793
794 BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
795 if (Q.DT->dominates(Edge1, Q.CxtI->getParent()))
796 computeKnownBitsFromCond(V, BI->getCondition(), Known, Depth, Q,
797 /*Invert*/ true);
798 }
799
800 if (Known.hasConflict())
801 Known.resetAll();
802 }
803
804 if (!Q.AC)
805 return;
806
807 unsigned BitWidth = Known.getBitWidth();
808
809 // Note that the patterns below need to be kept in sync with the code
810 // in AssumptionCache::updateAffectedValues.
811
812 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) {
813 if (!Elem.Assume)
814 continue;
815
816 AssumeInst *I = cast<AssumeInst>(Elem.Assume);
817 assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() &&
818 "Got assumption for the wrong function!");
819
820 if (Elem.Index != AssumptionCache::ExprResultIdx) {
821 if (!V->getType()->isPointerTy())
822 continue;
824 *I, I->bundle_op_info_begin()[Elem.Index])) {
825 if (RK.WasOn == V && RK.AttrKind == Attribute::Alignment &&
826 isPowerOf2_64(RK.ArgValue) &&
828 Known.Zero.setLowBits(Log2_64(RK.ArgValue));
829 }
830 continue;
831 }
832
833 // Warning: This loop can end up being somewhat performance sensitive.
834 // We're running this loop for once for each value queried resulting in a
835 // runtime of ~O(#assumes * #values).
836
837 Value *Arg = I->getArgOperand(0);
838
839 if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
840 assert(BitWidth == 1 && "assume operand is not i1?");
841 (void)BitWidth;
842 Known.setAllOnes();
843 return;
844 }
845 if (match(Arg, m_Not(m_Specific(V))) &&
847 assert(BitWidth == 1 && "assume operand is not i1?");
848 (void)BitWidth;
849 Known.setAllZero();
850 return;
851 }
852
853 // The remaining tests are all recursive, so bail out if we hit the limit.
855 continue;
856
857 ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
858 if (!Cmp)
859 continue;
860
861 if (!isValidAssumeForContext(I, Q.CxtI, Q.DT))
862 continue;
863
864 computeKnownBitsFromICmpCond(V, Cmp, Known, Q, /*Invert=*/false);
865 }
866
867 // Conflicting assumption: Undefined behavior will occur on this execution
868 // path.
869 if (Known.hasConflict())
870 Known.resetAll();
871}
872
873/// Compute known bits from a shift operator, including those with a
874/// non-constant shift amount. Known is the output of this function. Known2 is a
875/// pre-allocated temporary with the same bit width as Known and on return
876/// contains the known bit of the shift value source. KF is an
877/// operator-specific function that, given the known-bits and a shift amount,
878/// compute the implied known-bits of the shift operator's result respectively
879/// for that shift amount. The results from calling KF are conservatively
880/// combined for all permitted shift amounts.
882 const Operator *I, const APInt &DemandedElts, KnownBits &Known,
883 KnownBits &Known2, unsigned Depth, const SimplifyQuery &Q,
884 function_ref<KnownBits(const KnownBits &, const KnownBits &, bool)> KF) {
885 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
886 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
887 // To limit compile-time impact, only query isKnownNonZero() if we know at
888 // least something about the shift amount.
889 bool ShAmtNonZero =
890 Known.isNonZero() ||
891 (Known.getMaxValue().ult(Known.getBitWidth()) &&
892 isKnownNonZero(I->getOperand(1), DemandedElts, Q, Depth + 1));
893 Known = KF(Known2, Known, ShAmtNonZero);
894}
895
896static KnownBits
897getKnownBitsFromAndXorOr(const Operator *I, const APInt &DemandedElts,
898 const KnownBits &KnownLHS, const KnownBits &KnownRHS,
899 unsigned Depth, const SimplifyQuery &Q) {
900 unsigned BitWidth = KnownLHS.getBitWidth();
901 KnownBits KnownOut(BitWidth);
902 bool IsAnd = false;
903 bool HasKnownOne = !KnownLHS.One.isZero() || !KnownRHS.One.isZero();
904 Value *X = nullptr, *Y = nullptr;
905
906 switch (I->getOpcode()) {
907 case Instruction::And:
908 KnownOut = KnownLHS & KnownRHS;
909 IsAnd = true;
910 // and(x, -x) is common idioms that will clear all but lowest set
911 // bit. If we have a single known bit in x, we can clear all bits
912 // above it.
913 // TODO: instcombine often reassociates independent `and` which can hide
914 // this pattern. Try to match and(x, and(-x, y)) / and(and(x, y), -x).
915 if (HasKnownOne && match(I, m_c_And(m_Value(X), m_Neg(m_Deferred(X))))) {
916 // -(-x) == x so using whichever (LHS/RHS) gets us a better result.
917 if (KnownLHS.countMaxTrailingZeros() <= KnownRHS.countMaxTrailingZeros())
918 KnownOut = KnownLHS.blsi();
919 else
920 KnownOut = KnownRHS.blsi();
921 }
922 break;
923 case Instruction::Or:
924 KnownOut = KnownLHS | KnownRHS;
925 break;
926 case Instruction::Xor:
927 KnownOut = KnownLHS ^ KnownRHS;
928 // xor(x, x-1) is common idioms that will clear all but lowest set
929 // bit. If we have a single known bit in x, we can clear all bits
930 // above it.
931 // TODO: xor(x, x-1) is often rewritting as xor(x, x-C) where C !=
932 // -1 but for the purpose of demanded bits (xor(x, x-C) &
933 // Demanded) == (xor(x, x-1) & Demanded). Extend the xor pattern
934 // to use arbitrary C if xor(x, x-C) as the same as xor(x, x-1).
935 if (HasKnownOne &&
937 const KnownBits &XBits = I->getOperand(0) == X ? KnownLHS : KnownRHS;
938 KnownOut = XBits.blsmsk();
939 }
940 break;
941 default:
942 llvm_unreachable("Invalid Op used in 'analyzeKnownBitsFromAndXorOr'");
943 }
944
945 // and(x, add (x, -1)) is a common idiom that always clears the low bit;
946 // xor/or(x, add (x, -1)) is an idiom that will always set the low bit.
947 // here we handle the more general case of adding any odd number by
948 // matching the form and/xor/or(x, add(x, y)) where y is odd.
949 // TODO: This could be generalized to clearing any bit set in y where the
950 // following bit is known to be unset in y.
951 if (!KnownOut.Zero[0] && !KnownOut.One[0] &&
955 KnownBits KnownY(BitWidth);
956 computeKnownBits(Y, DemandedElts, KnownY, Depth + 1, Q);
957 if (KnownY.countMinTrailingOnes() > 0) {
958 if (IsAnd)
959 KnownOut.Zero.setBit(0);
960 else
961 KnownOut.One.setBit(0);
962 }
963 }
964 return KnownOut;
965}
966
968 const Operator *I, const APInt &DemandedElts, unsigned Depth,
969 const SimplifyQuery &Q,
970 const function_ref<KnownBits(const KnownBits &, const KnownBits &)>
971 KnownBitsFunc) {
972 APInt DemandedEltsLHS, DemandedEltsRHS;
974 DemandedElts, DemandedEltsLHS,
975 DemandedEltsRHS);
976
977 const auto ComputeForSingleOpFunc =
978 [Depth, &Q, KnownBitsFunc](const Value *Op, APInt &DemandedEltsOp) {
979 return KnownBitsFunc(
980 computeKnownBits(Op, DemandedEltsOp, Depth + 1, Q),
981 computeKnownBits(Op, DemandedEltsOp << 1, Depth + 1, Q));
982 };
983
984 if (DemandedEltsRHS.isZero())
985 return ComputeForSingleOpFunc(I->getOperand(0), DemandedEltsLHS);
986 if (DemandedEltsLHS.isZero())
987 return ComputeForSingleOpFunc(I->getOperand(1), DemandedEltsRHS);
988
989 return ComputeForSingleOpFunc(I->getOperand(0), DemandedEltsLHS)
990 .intersectWith(ComputeForSingleOpFunc(I->getOperand(1), DemandedEltsRHS));
991}
992
993// Public so this can be used in `SimplifyDemandedUseBits`.
995 const KnownBits &KnownLHS,
996 const KnownBits &KnownRHS,
997 unsigned Depth,
998 const SimplifyQuery &SQ) {
999 auto *FVTy = dyn_cast<FixedVectorType>(I->getType());
1000 APInt DemandedElts =
1001 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
1002
1003 return getKnownBitsFromAndXorOr(I, DemandedElts, KnownLHS, KnownRHS, Depth,
1004 SQ);
1005}
1006
1008 Attribute Attr = F->getFnAttribute(Attribute::VScaleRange);
1009 // Without vscale_range, we only know that vscale is non-zero.
1010 if (!Attr.isValid())
1012
1013 unsigned AttrMin = Attr.getVScaleRangeMin();
1014 // Minimum is larger than vscale width, result is always poison.
1015 if ((unsigned)llvm::bit_width(AttrMin) > BitWidth)
1016 return ConstantRange::getEmpty(BitWidth);
1017
1018 APInt Min(BitWidth, AttrMin);
1019 std::optional<unsigned> AttrMax = Attr.getVScaleRangeMax();
1020 if (!AttrMax || (unsigned)llvm::bit_width(*AttrMax) > BitWidth)
1022
1023 return ConstantRange(Min, APInt(BitWidth, *AttrMax) + 1);
1024}
1025
1027 Value *Arm, bool Invert, unsigned Depth,
1028 const SimplifyQuery &Q) {
1029 // If we have a constant arm, we are done.
1030 if (Known.isConstant())
1031 return;
1032
1033 // See what condition implies about the bits of the select arm.
1034 KnownBits CondRes(Known.getBitWidth());
1035 computeKnownBitsFromCond(Arm, Cond, CondRes, Depth + 1, Q, Invert);
1036 // If we don't get any information from the condition, no reason to
1037 // proceed.
1038 if (CondRes.isUnknown())
1039 return;
1040
1041 // We can have conflict if the condition is dead. I.e if we have
1042 // (x | 64) < 32 ? (x | 64) : y
1043 // we will have conflict at bit 6 from the condition/the `or`.
1044 // In that case just return. Its not particularly important
1045 // what we do, as this select is going to be simplified soon.
1046 CondRes = CondRes.unionWith(Known);
1047 if (CondRes.hasConflict())
1048 return;
1049
1050 // Finally make sure the information we found is valid. This is relatively
1051 // expensive so it's left for the very end.
1052 if (!isGuaranteedNotToBeUndef(Arm, Q.AC, Q.CxtI, Q.DT, Depth + 1))
1053 return;
1054
1055 // Finally, we know we get information from the condition and its valid,
1056 // so return it.
1057 Known = CondRes;
1058}
1059
1061 const APInt &DemandedElts,
1062 KnownBits &Known, unsigned Depth,
1063 const SimplifyQuery &Q) {
1064 unsigned BitWidth = Known.getBitWidth();
1065
1066 KnownBits Known2(BitWidth);
1067 switch (I->getOpcode()) {
1068 default: break;
1069 case Instruction::Load:
1070 if (MDNode *MD =
1071 Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_range))
1073 break;
1074 case Instruction::And:
1075 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1076 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1077
1078 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q);
1079 break;
1080 case Instruction::Or:
1081 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1082 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1083
1084 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q);
1085 break;
1086 case Instruction::Xor:
1087 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1088 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1089
1090 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q);
1091 break;
1092 case Instruction::Mul: {
1093 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1094 computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, DemandedElts,
1095 Known, Known2, Depth, Q);
1096 break;
1097 }
1098 case Instruction::UDiv: {
1099 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1100 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1101 Known =
1102 KnownBits::udiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I)));
1103 break;
1104 }
1105 case Instruction::SDiv: {
1106 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1107 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1108 Known =
1109 KnownBits::sdiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I)));
1110 break;
1111 }
1112 case Instruction::Select: {
1113 auto ComputeForArm = [&](Value *Arm, bool Invert) {
1114 KnownBits Res(Known.getBitWidth());
1115 computeKnownBits(Arm, DemandedElts, Res, Depth + 1, Q);
1116 adjustKnownBitsForSelectArm(Res, I->getOperand(0), Arm, Invert, Depth, Q);
1117 return Res;
1118 };
1119 // Only known if known in both the LHS and RHS.
1120 Known =
1121 ComputeForArm(I->getOperand(1), /*Invert=*/false)
1122 .intersectWith(ComputeForArm(I->getOperand(2), /*Invert=*/true));
1123 break;
1124 }
1125 case Instruction::FPTrunc:
1126 case Instruction::FPExt:
1127 case Instruction::FPToUI:
1128 case Instruction::FPToSI:
1129 case Instruction::SIToFP:
1130 case Instruction::UIToFP:
1131 break; // Can't work with floating point.
1132 case Instruction::PtrToInt:
1133 case Instruction::IntToPtr:
1134 // Fall through and handle them the same as zext/trunc.
1135 [[fallthrough]];
1136 case Instruction::ZExt:
1137 case Instruction::Trunc: {
1138 Type *SrcTy = I->getOperand(0)->getType();
1139
1140 unsigned SrcBitWidth;
1141 // Note that we handle pointer operands here because of inttoptr/ptrtoint
1142 // which fall through here.
1143 Type *ScalarTy = SrcTy->getScalarType();
1144 SrcBitWidth = ScalarTy->isPointerTy() ?
1145 Q.DL.getPointerTypeSizeInBits(ScalarTy) :
1146 Q.DL.getTypeSizeInBits(ScalarTy);
1147
1148 assert(SrcBitWidth && "SrcBitWidth can't be zero");
1149 Known = Known.anyextOrTrunc(SrcBitWidth);
1150 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1151 if (auto *Inst = dyn_cast<PossiblyNonNegInst>(I);
1152 Inst && Inst->hasNonNeg() && !Known.isNegative())
1153 Known.makeNonNegative();
1154 Known = Known.zextOrTrunc(BitWidth);
1155 break;
1156 }
1157 case Instruction::BitCast: {
1158 Type *SrcTy = I->getOperand(0)->getType();
1159 if (SrcTy->isIntOrPtrTy() &&
1160 // TODO: For now, not handling conversions like:
1161 // (bitcast i64 %x to <2 x i32>)
1162 !I->getType()->isVectorTy()) {
1163 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1164 break;
1165 }
1166
1167 const Value *V;
1168 // Handle bitcast from floating point to integer.
1169 if (match(I, m_ElementWiseBitCast(m_Value(V))) &&
1170 V->getType()->isFPOrFPVectorTy()) {
1171 Type *FPType = V->getType()->getScalarType();
1172 KnownFPClass Result =
1173 computeKnownFPClass(V, DemandedElts, fcAllFlags, Depth + 1, Q);
1174 FPClassTest FPClasses = Result.KnownFPClasses;
1175
1176 // TODO: Treat it as zero/poison if the use of I is unreachable.
1177 if (FPClasses == fcNone)
1178 break;
1179
1180 if (Result.isKnownNever(fcNormal | fcSubnormal | fcNan)) {
1181 Known.Zero.setAllBits();
1182 Known.One.setAllBits();
1183
1184 if (FPClasses & fcInf)
1187
1188 if (FPClasses & fcZero)
1191
1192 Known.Zero.clearSignBit();
1193 Known.One.clearSignBit();
1194 }
1195
1196 if (Result.SignBit) {
1197 if (*Result.SignBit)
1198 Known.makeNegative();
1199 else
1200 Known.makeNonNegative();
1201 }
1202
1203 break;
1204 }
1205
1206 // Handle cast from vector integer type to scalar or vector integer.
1207 auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcTy);
1208 if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() ||
1209 !I->getType()->isIntOrIntVectorTy() ||
1210 isa<ScalableVectorType>(I->getType()))
1211 break;
1212
1213 // Look through a cast from narrow vector elements to wider type.
1214 // Examples: v4i32 -> v2i64, v3i8 -> v24
1215 unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits();
1216 if (BitWidth % SubBitWidth == 0) {
1217 // Known bits are automatically intersected across demanded elements of a
1218 // vector. So for example, if a bit is computed as known zero, it must be
1219 // zero across all demanded elements of the vector.
1220 //
1221 // For this bitcast, each demanded element of the output is sub-divided
1222 // across a set of smaller vector elements in the source vector. To get
1223 // the known bits for an entire element of the output, compute the known
1224 // bits for each sub-element sequentially. This is done by shifting the
1225 // one-set-bit demanded elements parameter across the sub-elements for
1226 // consecutive calls to computeKnownBits. We are using the demanded
1227 // elements parameter as a mask operator.
1228 //
1229 // The known bits of each sub-element are then inserted into place
1230 // (dependent on endian) to form the full result of known bits.
1231 unsigned NumElts = DemandedElts.getBitWidth();
1232 unsigned SubScale = BitWidth / SubBitWidth;
1233 APInt SubDemandedElts = APInt::getZero(NumElts * SubScale);
1234 for (unsigned i = 0; i != NumElts; ++i) {
1235 if (DemandedElts[i])
1236 SubDemandedElts.setBit(i * SubScale);
1237 }
1238
1239 KnownBits KnownSrc(SubBitWidth);
1240 for (unsigned i = 0; i != SubScale; ++i) {
1241 computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc,
1242 Depth + 1, Q);
1243 unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i;
1244 Known.insertBits(KnownSrc, ShiftElt * SubBitWidth);
1245 }
1246 }
1247 break;
1248 }
1249 case Instruction::SExt: {
1250 // Compute the bits in the result that are not present in the input.
1251 unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
1252
1253 Known = Known.trunc(SrcBitWidth);
1254 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1255 // If the sign bit of the input is known set or clear, then we know the
1256 // top bits of the result.
1257 Known = Known.sext(BitWidth);
1258 break;
1259 }
1260 case Instruction::Shl: {
1261 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1262 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1263 auto KF = [NUW, NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1264 bool ShAmtNonZero) {
1265 return KnownBits::shl(KnownVal, KnownAmt, NUW, NSW, ShAmtNonZero);
1266 };
1267 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1268 KF);
1269 // Trailing zeros of a right-shifted constant never decrease.
1270 const APInt *C;
1271 if (match(I->getOperand(0), m_APInt(C)))
1272 Known.Zero.setLowBits(C->countr_zero());
1273 break;
1274 }
1275 case Instruction::LShr: {
1276 bool Exact = Q.IIQ.isExact(cast<BinaryOperator>(I));
1277 auto KF = [Exact](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1278 bool ShAmtNonZero) {
1279 return KnownBits::lshr(KnownVal, KnownAmt, ShAmtNonZero, Exact);
1280 };
1281 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1282 KF);
1283 // Leading zeros of a left-shifted constant never decrease.
1284 const APInt *C;
1285 if (match(I->getOperand(0), m_APInt(C)))
1286 Known.Zero.setHighBits(C->countl_zero());
1287 break;
1288 }
1289 case Instruction::AShr: {
1290 bool Exact = Q.IIQ.isExact(cast<BinaryOperator>(I));
1291 auto KF = [Exact](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1292 bool ShAmtNonZero) {
1293 return KnownBits::ashr(KnownVal, KnownAmt, ShAmtNonZero, Exact);
1294 };
1295 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1296 KF);
1297 break;
1298 }
1299 case Instruction::Sub: {
1300 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1301 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1302 computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, NUW,
1303 DemandedElts, Known, Known2, Depth, Q);
1304 break;
1305 }
1306 case Instruction::Add: {
1307 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1308 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1309 computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, NUW,
1310 DemandedElts, Known, Known2, Depth, Q);
1311 break;
1312 }
1313 case Instruction::SRem:
1314 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1315 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1316 Known = KnownBits::srem(Known, Known2);
1317 break;
1318
1319 case Instruction::URem:
1320 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1321 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1322 Known = KnownBits::urem(Known, Known2);
1323 break;
1324 case Instruction::Alloca:
1325 Known.Zero.setLowBits(Log2(cast<AllocaInst>(I)->getAlign()));
1326 break;
1327 case Instruction::GetElementPtr: {
1328 // Analyze all of the subscripts of this getelementptr instruction
1329 // to determine if we can prove known low zero bits.
1330 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1331 // Accumulate the constant indices in a separate variable
1332 // to minimize the number of calls to computeForAddSub.
1333 APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true);
1334
1336 for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
1337 // TrailZ can only become smaller, short-circuit if we hit zero.
1338 if (Known.isUnknown())
1339 break;
1340
1341 Value *Index = I->getOperand(i);
1342
1343 // Handle case when index is zero.
1344 Constant *CIndex = dyn_cast<Constant>(Index);
1345 if (CIndex && CIndex->isZeroValue())
1346 continue;
1347
1348 if (StructType *STy = GTI.getStructTypeOrNull()) {
1349 // Handle struct member offset arithmetic.
1350
1351 assert(CIndex &&
1352 "Access to structure field must be known at compile time");
1353
1354 if (CIndex->getType()->isVectorTy())
1355 Index = CIndex->getSplatValue();
1356
1357 unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
1358 const StructLayout *SL = Q.DL.getStructLayout(STy);
1360 AccConstIndices += Offset;
1361 continue;
1362 }
1363
1364 // Handle array index arithmetic.
1365 Type *IndexedTy = GTI.getIndexedType();
1366 if (!IndexedTy->isSized()) {
1367 Known.resetAll();
1368 break;
1369 }
1370
1371 unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits();
1372 KnownBits IndexBits(IndexBitWidth);
1373 computeKnownBits(Index, IndexBits, Depth + 1, Q);
1374 TypeSize IndexTypeSize = GTI.getSequentialElementStride(Q.DL);
1375 uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinValue();
1376 KnownBits ScalingFactor(IndexBitWidth);
1377 // Multiply by current sizeof type.
1378 // &A[i] == A + i * sizeof(*A[i]).
1379 if (IndexTypeSize.isScalable()) {
1380 // For scalable types the only thing we know about sizeof is
1381 // that this is a multiple of the minimum size.
1382 ScalingFactor.Zero.setLowBits(llvm::countr_zero(TypeSizeInBytes));
1383 } else if (IndexBits.isConstant()) {
1384 APInt IndexConst = IndexBits.getConstant();
1385 APInt ScalingFactor(IndexBitWidth, TypeSizeInBytes);
1386 IndexConst *= ScalingFactor;
1387 AccConstIndices += IndexConst.sextOrTrunc(BitWidth);
1388 continue;
1389 } else {
1390 ScalingFactor =
1391 KnownBits::makeConstant(APInt(IndexBitWidth, TypeSizeInBytes));
1392 }
1393 IndexBits = KnownBits::mul(IndexBits, ScalingFactor);
1394
1395 // If the offsets have a different width from the pointer, according
1396 // to the language reference we need to sign-extend or truncate them
1397 // to the width of the pointer.
1398 IndexBits = IndexBits.sextOrTrunc(BitWidth);
1399
1400 // Note that inbounds does *not* guarantee nsw for the addition, as only
1401 // the offset is signed, while the base address is unsigned.
1402 Known = KnownBits::add(Known, IndexBits);
1403 }
1404 if (!Known.isUnknown() && !AccConstIndices.isZero()) {
1405 KnownBits Index = KnownBits::makeConstant(AccConstIndices);
1406 Known = KnownBits::add(Known, Index);
1407 }
1408 break;
1409 }
1410 case Instruction::PHI: {
1411 const PHINode *P = cast<PHINode>(I);
1412 BinaryOperator *BO = nullptr;
1413 Value *R = nullptr, *L = nullptr;
1414 if (matchSimpleRecurrence(P, BO, R, L)) {
1415 // Handle the case of a simple two-predecessor recurrence PHI.
1416 // There's a lot more that could theoretically be done here, but
1417 // this is sufficient to catch some interesting cases.
1418 unsigned Opcode = BO->getOpcode();
1419
1420 // If this is a shift recurrence, we know the bits being shifted in.
1421 // We can combine that with information about the start value of the
1422 // recurrence to conclude facts about the result.
1423 if ((Opcode == Instruction::LShr || Opcode == Instruction::AShr ||
1424 Opcode == Instruction::Shl) &&
1425 BO->getOperand(0) == I) {
1426
1427 // We have matched a recurrence of the form:
1428 // %iv = [R, %entry], [%iv.next, %backedge]
1429 // %iv.next = shift_op %iv, L
1430
1431 // Recurse with the phi context to avoid concern about whether facts
1432 // inferred hold at original context instruction. TODO: It may be
1433 // correct to use the original context. IF warranted, explore and
1434 // add sufficient tests to cover.
1436 RecQ.CxtI = P;
1437 computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ);
1438 switch (Opcode) {
1439 case Instruction::Shl:
1440 // A shl recurrence will only increase the tailing zeros
1441 Known.Zero.setLowBits(Known2.countMinTrailingZeros());
1442 break;
1443 case Instruction::LShr:
1444 // A lshr recurrence will preserve the leading zeros of the
1445 // start value
1446 Known.Zero.setHighBits(Known2.countMinLeadingZeros());
1447 break;
1448 case Instruction::AShr:
1449 // An ashr recurrence will extend the initial sign bit
1450 Known.Zero.setHighBits(Known2.countMinLeadingZeros());
1451 Known.One.setHighBits(Known2.countMinLeadingOnes());
1452 break;
1453 };
1454 }
1455
1456 // Check for operations that have the property that if
1457 // both their operands have low zero bits, the result
1458 // will have low zero bits.
1459 if (Opcode == Instruction::Add ||
1460 Opcode == Instruction::Sub ||
1461 Opcode == Instruction::And ||
1462 Opcode == Instruction::Or ||
1463 Opcode == Instruction::Mul) {
1464 // Change the context instruction to the "edge" that flows into the
1465 // phi. This is important because that is where the value is actually
1466 // "evaluated" even though it is used later somewhere else. (see also
1467 // D69571).
1469
1470 unsigned OpNum = P->getOperand(0) == R ? 0 : 1;
1471 Instruction *RInst = P->getIncomingBlock(OpNum)->getTerminator();
1472 Instruction *LInst = P->getIncomingBlock(1 - OpNum)->getTerminator();
1473
1474 // Ok, we have a PHI of the form L op= R. Check for low
1475 // zero bits.
1476 RecQ.CxtI = RInst;
1477 computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ);
1478
1479 // We need to take the minimum number of known bits
1480 KnownBits Known3(BitWidth);
1481 RecQ.CxtI = LInst;
1482 computeKnownBits(L, DemandedElts, Known3, Depth + 1, RecQ);
1483
1484 Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(),
1485 Known3.countMinTrailingZeros()));
1486
1487 auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(BO);
1488 if (OverflowOp && Q.IIQ.hasNoSignedWrap(OverflowOp)) {
1489 // If initial value of recurrence is nonnegative, and we are adding
1490 // a nonnegative number with nsw, the result can only be nonnegative
1491 // or poison value regardless of the number of times we execute the
1492 // add in phi recurrence. If initial value is negative and we are
1493 // adding a negative number with nsw, the result can only be
1494 // negative or poison value. Similar arguments apply to sub and mul.
1495 //
1496 // (add non-negative, non-negative) --> non-negative
1497 // (add negative, negative) --> negative
1498 if (Opcode == Instruction::Add) {
1499 if (Known2.isNonNegative() && Known3.isNonNegative())
1500 Known.makeNonNegative();
1501 else if (Known2.isNegative() && Known3.isNegative())
1502 Known.makeNegative();
1503 }
1504
1505 // (sub nsw non-negative, negative) --> non-negative
1506 // (sub nsw negative, non-negative) --> negative
1507 else if (Opcode == Instruction::Sub && BO->getOperand(0) == I) {
1508 if (Known2.isNonNegative() && Known3.isNegative())
1509 Known.makeNonNegative();
1510 else if (Known2.isNegative() && Known3.isNonNegative())
1511 Known.makeNegative();
1512 }
1513
1514 // (mul nsw non-negative, non-negative) --> non-negative
1515 else if (Opcode == Instruction::Mul && Known2.isNonNegative() &&
1516 Known3.isNonNegative())
1517 Known.makeNonNegative();
1518 }
1519
1520 break;
1521 }
1522 }
1523
1524 // Unreachable blocks may have zero-operand PHI nodes.
1525 if (P->getNumIncomingValues() == 0)
1526 break;
1527
1528 // Otherwise take the unions of the known bit sets of the operands,
1529 // taking conservative care to avoid excessive recursion.
1530 if (Depth < MaxAnalysisRecursionDepth - 1 && Known.isUnknown()) {
1531 // Skip if every incoming value references to ourself.
1532 if (isa_and_nonnull<UndefValue>(P->hasConstantValue()))
1533 break;
1534
1535 Known.Zero.setAllBits();
1536 Known.One.setAllBits();
1537 for (unsigned u = 0, e = P->getNumIncomingValues(); u < e; ++u) {
1538 Value *IncValue = P->getIncomingValue(u);
1539 // Skip direct self references.
1540 if (IncValue == P) continue;
1541
1542 // Change the context instruction to the "edge" that flows into the
1543 // phi. This is important because that is where the value is actually
1544 // "evaluated" even though it is used later somewhere else. (see also
1545 // D69571).
1547 RecQ.CxtI = P->getIncomingBlock(u)->getTerminator();
1548
1549 Known2 = KnownBits(BitWidth);
1550
1551 // Recurse, but cap the recursion to one level, because we don't
1552 // want to waste time spinning around in loops.
1553 // TODO: See if we can base recursion limiter on number of incoming phi
1554 // edges so we don't overly clamp analysis.
1555 computeKnownBits(IncValue, DemandedElts, Known2,
1556 MaxAnalysisRecursionDepth - 1, RecQ);
1557
1558 // See if we can further use a conditional branch into the phi
1559 // to help us determine the range of the value.
1560 if (!Known2.isConstant()) {
1562 const APInt *RHSC;
1563 BasicBlock *TrueSucc, *FalseSucc;
1564 // TODO: Use RHS Value and compute range from its known bits.
1565 if (match(RecQ.CxtI,
1566 m_Br(m_c_ICmp(Pred, m_Specific(IncValue), m_APInt(RHSC)),
1567 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) {
1568 // Check for cases of duplicate successors.
1569 if ((TrueSucc == P->getParent()) != (FalseSucc == P->getParent())) {
1570 // If we're using the false successor, invert the predicate.
1571 if (FalseSucc == P->getParent())
1572 Pred = CmpInst::getInversePredicate(Pred);
1573 // Get the knownbits implied by the incoming phi condition.
1574 auto CR = ConstantRange::makeExactICmpRegion(Pred, *RHSC);
1575 KnownBits KnownUnion = Known2.unionWith(CR.toKnownBits());
1576 // We can have conflicts here if we are analyzing deadcode (its
1577 // impossible for us reach this BB based the icmp).
1578 if (KnownUnion.hasConflict()) {
1579 // No reason to continue analyzing in a known dead region, so
1580 // just resetAll and break. This will cause us to also exit the
1581 // outer loop.
1582 Known.resetAll();
1583 break;
1584 }
1585 Known2 = KnownUnion;
1586 }
1587 }
1588 }
1589
1590 Known = Known.intersectWith(Known2);
1591 // If all bits have been ruled out, there's no need to check
1592 // more operands.
1593 if (Known.isUnknown())
1594 break;
1595 }
1596 }
1597 break;
1598 }
1599 case Instruction::Call:
1600 case Instruction::Invoke: {
1601 // If range metadata is attached to this call, set known bits from that,
1602 // and then intersect with known bits based on other properties of the
1603 // function.
1604 if (MDNode *MD =
1605 Q.IIQ.getMetadata(cast<Instruction>(I), LLVMContext::MD_range))
1607
1608 const auto *CB = cast<CallBase>(I);
1609
1610 if (std::optional<ConstantRange> Range = CB->getRange())
1611 Known = Known.unionWith(Range->toKnownBits());
1612
1613 if (const Value *RV = CB->getReturnedArgOperand()) {
1614 if (RV->getType() == I->getType()) {
1615 computeKnownBits(RV, Known2, Depth + 1, Q);
1616 Known = Known.unionWith(Known2);
1617 // If the function doesn't return properly for all input values
1618 // (e.g. unreachable exits) then there might be conflicts between the
1619 // argument value and the range metadata. Simply discard the known bits
1620 // in case of conflicts.
1621 if (Known.hasConflict())
1622 Known.resetAll();
1623 }
1624 }
1625 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1626 switch (II->getIntrinsicID()) {
1627 default:
1628 break;
1629 case Intrinsic::abs: {
1630 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1631 bool IntMinIsPoison = match(II->getArgOperand(1), m_One());
1632 Known = Known2.abs(IntMinIsPoison);
1633 break;
1634 }
1635 case Intrinsic::bitreverse:
1636 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1637 Known.Zero |= Known2.Zero.reverseBits();
1638 Known.One |= Known2.One.reverseBits();
1639 break;
1640 case Intrinsic::bswap:
1641 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1642 Known.Zero |= Known2.Zero.byteSwap();
1643 Known.One |= Known2.One.byteSwap();
1644 break;
1645 case Intrinsic::ctlz: {
1646 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1647 // If we have a known 1, its position is our upper bound.
1648 unsigned PossibleLZ = Known2.countMaxLeadingZeros();
1649 // If this call is poison for 0 input, the result will be less than 2^n.
1650 if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1651 PossibleLZ = std::min(PossibleLZ, BitWidth - 1);
1652 unsigned LowBits = llvm::bit_width(PossibleLZ);
1653 Known.Zero.setBitsFrom(LowBits);
1654 break;
1655 }
1656 case Intrinsic::cttz: {
1657 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1658 // If we have a known 1, its position is our upper bound.
1659 unsigned PossibleTZ = Known2.countMaxTrailingZeros();
1660 // If this call is poison for 0 input, the result will be less than 2^n.
1661 if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1662 PossibleTZ = std::min(PossibleTZ, BitWidth - 1);
1663 unsigned LowBits = llvm::bit_width(PossibleTZ);
1664 Known.Zero.setBitsFrom(LowBits);
1665 break;
1666 }
1667 case Intrinsic::ctpop: {
1668 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1669 // We can bound the space the count needs. Also, bits known to be zero
1670 // can't contribute to the population.
1671 unsigned BitsPossiblySet = Known2.countMaxPopulation();
1672 unsigned LowBits = llvm::bit_width(BitsPossiblySet);
1673 Known.Zero.setBitsFrom(LowBits);
1674 // TODO: we could bound KnownOne using the lower bound on the number
1675 // of bits which might be set provided by popcnt KnownOne2.
1676 break;
1677 }
1678 case Intrinsic::fshr:
1679 case Intrinsic::fshl: {
1680 const APInt *SA;
1681 if (!match(I->getOperand(2), m_APInt(SA)))
1682 break;
1683
1684 // Normalize to funnel shift left.
1685 uint64_t ShiftAmt = SA->urem(BitWidth);
1686 if (II->getIntrinsicID() == Intrinsic::fshr)
1687 ShiftAmt = BitWidth - ShiftAmt;
1688
1689 KnownBits Known3(BitWidth);
1690 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1691 computeKnownBits(I->getOperand(1), DemandedElts, Known3, Depth + 1, Q);
1692
1693 Known.Zero =
1694 Known2.Zero.shl(ShiftAmt) | Known3.Zero.lshr(BitWidth - ShiftAmt);
1695 Known.One =
1696 Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt);
1697 break;
1698 }
1699 case Intrinsic::uadd_sat:
1700 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1701 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1702 Known = KnownBits::uadd_sat(Known, Known2);
1703 break;
1704 case Intrinsic::usub_sat:
1705 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1706 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1707 Known = KnownBits::usub_sat(Known, Known2);
1708 break;
1709 case Intrinsic::sadd_sat:
1710 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1711 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1712 Known = KnownBits::sadd_sat(Known, Known2);
1713 break;
1714 case Intrinsic::ssub_sat:
1715 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1716 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1717 Known = KnownBits::ssub_sat(Known, Known2);
1718 break;
1719 // Vec reverse preserves bits from input vec.
1720 case Intrinsic::vector_reverse:
1721 computeKnownBits(I->getOperand(0), DemandedElts.reverseBits(), Known,
1722 Depth + 1, Q);
1723 break;
1724 // for min/max/and/or reduce, any bit common to each element in the
1725 // input vec is set in the output.
1726 case Intrinsic::vector_reduce_and:
1727 case Intrinsic::vector_reduce_or:
1728 case Intrinsic::vector_reduce_umax:
1729 case Intrinsic::vector_reduce_umin:
1730 case Intrinsic::vector_reduce_smax:
1731 case Intrinsic::vector_reduce_smin:
1732 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1733 break;
1734 case Intrinsic::vector_reduce_xor: {
1735 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1736 // The zeros common to all vecs are zero in the output.
1737 // If the number of elements is odd, then the common ones remain. If the
1738 // number of elements is even, then the common ones becomes zeros.
1739 auto *VecTy = cast<VectorType>(I->getOperand(0)->getType());
1740 // Even, so the ones become zeros.
1741 bool EvenCnt = VecTy->getElementCount().isKnownEven();
1742 if (EvenCnt)
1743 Known.Zero |= Known.One;
1744 // Maybe even element count so need to clear ones.
1745 if (VecTy->isScalableTy() || EvenCnt)
1746 Known.One.clearAllBits();
1747 break;
1748 }
1749 case Intrinsic::umin:
1750 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1751 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1752 Known = KnownBits::umin(Known, Known2);
1753 break;
1754 case Intrinsic::umax:
1755 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1756 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1757 Known = KnownBits::umax(Known, Known2);
1758 break;
1759 case Intrinsic::smin:
1760 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1761 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1762 Known = KnownBits::smin(Known, Known2);
1763 break;
1764 case Intrinsic::smax:
1765 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1766 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1767 Known = KnownBits::smax(Known, Known2);
1768 break;
1769 case Intrinsic::ptrmask: {
1770 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1771
1772 const Value *Mask = I->getOperand(1);
1773 Known2 = KnownBits(Mask->getType()->getScalarSizeInBits());
1774 computeKnownBits(Mask, DemandedElts, Known2, Depth + 1, Q);
1775 // TODO: 1-extend would be more precise.
1776 Known &= Known2.anyextOrTrunc(BitWidth);
1777 break;
1778 }
1779 case Intrinsic::x86_sse2_pmulh_w:
1780 case Intrinsic::x86_avx2_pmulh_w:
1781 case Intrinsic::x86_avx512_pmulh_w_512:
1782 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1783 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1784 Known = KnownBits::mulhs(Known, Known2);
1785 break;
1786 case Intrinsic::x86_sse2_pmulhu_w:
1787 case Intrinsic::x86_avx2_pmulhu_w:
1788 case Intrinsic::x86_avx512_pmulhu_w_512:
1789 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1790 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1791 Known = KnownBits::mulhu(Known, Known2);
1792 break;
1793 case Intrinsic::x86_sse42_crc32_64_64:
1794 Known.Zero.setBitsFrom(32);
1795 break;
1796 case Intrinsic::x86_ssse3_phadd_d_128:
1797 case Intrinsic::x86_ssse3_phadd_w_128:
1798 case Intrinsic::x86_avx2_phadd_d:
1799 case Intrinsic::x86_avx2_phadd_w: {
1801 I, DemandedElts, Depth, Q,
1802 [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
1803 return KnownBits::add(KnownLHS, KnownRHS);
1804 });
1805 break;
1806 }
1807 case Intrinsic::x86_ssse3_phadd_sw_128:
1808 case Intrinsic::x86_avx2_phadd_sw: {
1809 Known = computeKnownBitsForHorizontalOperation(I, DemandedElts, Depth,
1811 break;
1812 }
1813 case Intrinsic::x86_ssse3_phsub_d_128:
1814 case Intrinsic::x86_ssse3_phsub_w_128:
1815 case Intrinsic::x86_avx2_phsub_d:
1816 case Intrinsic::x86_avx2_phsub_w: {
1818 I, DemandedElts, Depth, Q,
1819 [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
1820 return KnownBits::sub(KnownLHS, KnownRHS);
1821 });
1822 break;
1823 }
1824 case Intrinsic::x86_ssse3_phsub_sw_128:
1825 case Intrinsic::x86_avx2_phsub_sw: {
1826 Known = computeKnownBitsForHorizontalOperation(I, DemandedElts, Depth,
1828 break;
1829 }
1830 case Intrinsic::riscv_vsetvli:
1831 case Intrinsic::riscv_vsetvlimax: {
1832 bool HasAVL = II->getIntrinsicID() == Intrinsic::riscv_vsetvli;
1833 const ConstantRange Range = getVScaleRange(II->getFunction(), BitWidth);
1835 cast<ConstantInt>(II->getArgOperand(HasAVL))->getZExtValue());
1836 RISCVII::VLMUL VLMUL = static_cast<RISCVII::VLMUL>(
1837 cast<ConstantInt>(II->getArgOperand(1 + HasAVL))->getZExtValue());
1838 uint64_t MaxVLEN =
1840 uint64_t MaxVL = MaxVLEN / RISCVVType::getSEWLMULRatio(SEW, VLMUL);
1841
1842 // Result of vsetvli must be not larger than AVL.
1843 if (HasAVL)
1844 if (auto *CI = dyn_cast<ConstantInt>(II->getArgOperand(0)))
1845 MaxVL = std::min(MaxVL, CI->getZExtValue());
1846
1847 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
1848 if (BitWidth > KnownZeroFirstBit)
1849 Known.Zero.setBitsFrom(KnownZeroFirstBit);
1850 break;
1851 }
1852 case Intrinsic::vscale: {
1853 if (!II->getParent() || !II->getFunction())
1854 break;
1855
1856 Known = getVScaleRange(II->getFunction(), BitWidth).toKnownBits();
1857 break;
1858 }
1859 }
1860 }
1861 break;
1862 }
1863 case Instruction::ShuffleVector: {
1864 auto *Shuf = dyn_cast<ShuffleVectorInst>(I);
1865 // FIXME: Do we need to handle ConstantExpr involving shufflevectors?
1866 if (!Shuf) {
1867 Known.resetAll();
1868 return;
1869 }
1870 // For undef elements, we don't know anything about the common state of
1871 // the shuffle result.
1872 APInt DemandedLHS, DemandedRHS;
1873 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) {
1874 Known.resetAll();
1875 return;
1876 }
1877 Known.One.setAllBits();
1878 Known.Zero.setAllBits();
1879 if (!!DemandedLHS) {
1880 const Value *LHS = Shuf->getOperand(0);
1881 computeKnownBits(LHS, DemandedLHS, Known, Depth + 1, Q);
1882 // If we don't know any bits, early out.
1883 if (Known.isUnknown())
1884 break;
1885 }
1886 if (!!DemandedRHS) {
1887 const Value *RHS = Shuf->getOperand(1);
1888 computeKnownBits(RHS, DemandedRHS, Known2, Depth + 1, Q);
1889 Known = Known.intersectWith(Known2);
1890 }
1891 break;
1892 }
1893 case Instruction::InsertElement: {
1894 if (isa<ScalableVectorType>(I->getType())) {
1895 Known.resetAll();
1896 return;
1897 }
1898 const Value *Vec = I->getOperand(0);
1899 const Value *Elt = I->getOperand(1);
1900 auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2));
1901 unsigned NumElts = DemandedElts.getBitWidth();
1902 APInt DemandedVecElts = DemandedElts;
1903 bool NeedsElt = true;
1904 // If we know the index we are inserting too, clear it from Vec check.
1905 if (CIdx && CIdx->getValue().ult(NumElts)) {
1906 DemandedVecElts.clearBit(CIdx->getZExtValue());
1907 NeedsElt = DemandedElts[CIdx->getZExtValue()];
1908 }
1909
1910 Known.One.setAllBits();
1911 Known.Zero.setAllBits();
1912 if (NeedsElt) {
1913 computeKnownBits(Elt, Known, Depth + 1, Q);
1914 // If we don't know any bits, early out.
1915 if (Known.isUnknown())
1916 break;
1917 }
1918
1919 if (!DemandedVecElts.isZero()) {
1920 computeKnownBits(Vec, DemandedVecElts, Known2, Depth + 1, Q);
1921 Known = Known.intersectWith(Known2);
1922 }
1923 break;
1924 }
1925 case Instruction::ExtractElement: {
1926 // Look through extract element. If the index is non-constant or
1927 // out-of-range demand all elements, otherwise just the extracted element.
1928 const Value *Vec = I->getOperand(0);
1929 const Value *Idx = I->getOperand(1);
1930 auto *CIdx = dyn_cast<ConstantInt>(Idx);
1931 if (isa<ScalableVectorType>(Vec->getType())) {
1932 // FIXME: there's probably *something* we can do with scalable vectors
1933 Known.resetAll();
1934 break;
1935 }
1936 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1937 APInt DemandedVecElts = APInt::getAllOnes(NumElts);
1938 if (CIdx && CIdx->getValue().ult(NumElts))
1939 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
1940 computeKnownBits(Vec, DemandedVecElts, Known, Depth + 1, Q);
1941 break;
1942 }
1943 case Instruction::ExtractValue:
1944 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {
1945 const ExtractValueInst *EVI = cast<ExtractValueInst>(I);
1946 if (EVI->getNumIndices() != 1) break;
1947 if (EVI->getIndices()[0] == 0) {
1948 switch (II->getIntrinsicID()) {
1949 default: break;
1950 case Intrinsic::uadd_with_overflow:
1951 case Intrinsic::sadd_with_overflow:
1953 true, II->getArgOperand(0), II->getArgOperand(1), /*NSW=*/false,
1954 /* NUW=*/false, DemandedElts, Known, Known2, Depth, Q);
1955 break;
1956 case Intrinsic::usub_with_overflow:
1957 case Intrinsic::ssub_with_overflow:
1959 false, II->getArgOperand(0), II->getArgOperand(1), /*NSW=*/false,
1960 /* NUW=*/false, DemandedElts, Known, Known2, Depth, Q);
1961 break;
1962 case Intrinsic::umul_with_overflow:
1963 case Intrinsic::smul_with_overflow:
1964 computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false,
1965 DemandedElts, Known, Known2, Depth, Q);
1966 break;
1967 }
1968 }
1969 }
1970 break;
1971 case Instruction::Freeze:
1972 if (isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT,
1973 Depth + 1))
1974 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1975 break;
1976 }
1977}
1978
1979/// Determine which bits of V are known to be either zero or one and return
1980/// them.
1981KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
1982 unsigned Depth, const SimplifyQuery &Q) {
1983 KnownBits Known(getBitWidth(V->getType(), Q.DL));
1984 ::computeKnownBits(V, DemandedElts, Known, Depth, Q);
1985 return Known;
1986}
1987
1988/// Determine which bits of V are known to be either zero or one and return
1989/// them.
1991 const SimplifyQuery &Q) {
1992 KnownBits Known(getBitWidth(V->getType(), Q.DL));
1993 computeKnownBits(V, Known, Depth, Q);
1994 return Known;
1995}
1996
1997/// Determine which bits of V are known to be either zero or one and return
1998/// them in the Known bit set.
1999///
2000/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that
2001/// we cannot optimize based on the assumption that it is zero without changing
2002/// it to be an explicit zero. If we don't change it to zero, other code could
2003/// optimized based on the contradictory assumption that it is non-zero.
2004/// Because instcombine aggressively folds operations with undef args anyway,
2005/// this won't lose us code quality.
2006///
2007/// This function is defined on values with integer type, values with pointer
2008/// type, and vectors of integers. In the case
2009/// where V is a vector, known zero, and known one values are the
2010/// same width as the vector element, and the bit is set only if it is true
2011/// for all of the demanded elements in the vector specified by DemandedElts.
2012void computeKnownBits(const Value *V, const APInt &DemandedElts,
2013 KnownBits &Known, unsigned Depth,
2014 const SimplifyQuery &Q) {
2015 if (!DemandedElts) {
2016 // No demanded elts, better to assume we don't know anything.
2017 Known.resetAll();
2018 return;
2019 }
2020
2021 assert(V && "No Value?");
2022 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
2023
2024#ifndef NDEBUG
2025 Type *Ty = V->getType();
2026 unsigned BitWidth = Known.getBitWidth();
2027
2029 "Not integer or pointer type!");
2030
2031 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
2032 assert(
2033 FVTy->getNumElements() == DemandedElts.getBitWidth() &&
2034 "DemandedElt width should equal the fixed vector number of elements");
2035 } else {
2036 assert(DemandedElts == APInt(1, 1) &&
2037 "DemandedElt width should be 1 for scalars or scalable vectors");
2038 }
2039
2040 Type *ScalarTy = Ty->getScalarType();
2041 if (ScalarTy->isPointerTy()) {
2042 assert(BitWidth == Q.DL.getPointerTypeSizeInBits(ScalarTy) &&
2043 "V and Known should have same BitWidth");
2044 } else {
2045 assert(BitWidth == Q.DL.getTypeSizeInBits(ScalarTy) &&
2046 "V and Known should have same BitWidth");
2047 }
2048#endif
2049
2050 const APInt *C;
2051 if (match(V, m_APInt(C))) {
2052 // We know all of the bits for a scalar constant or a splat vector constant!
2053 Known = KnownBits::makeConstant(*C);
2054 return;
2055 }
2056 // Null and aggregate-zero are all-zeros.
2057 if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) {
2058 Known.setAllZero();
2059 return;
2060 }
2061 // Handle a constant vector by taking the intersection of the known bits of
2062 // each element.
2063 if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(V)) {
2064 assert(!isa<ScalableVectorType>(V->getType()));
2065 // We know that CDV must be a vector of integers. Take the intersection of
2066 // each element.
2067 Known.Zero.setAllBits(); Known.One.setAllBits();
2068 for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) {
2069 if (!DemandedElts[i])
2070 continue;
2071 APInt Elt = CDV->getElementAsAPInt(i);
2072 Known.Zero &= ~Elt;
2073 Known.One &= Elt;
2074 }
2075 if (Known.hasConflict())
2076 Known.resetAll();
2077 return;
2078 }
2079
2080 if (const auto *CV = dyn_cast<ConstantVector>(V)) {
2081 assert(!isa<ScalableVectorType>(V->getType()));
2082 // We know that CV must be a vector of integers. Take the intersection of
2083 // each element.
2084 Known.Zero.setAllBits(); Known.One.setAllBits();
2085 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
2086 if (!DemandedElts[i])
2087 continue;
2088 Constant *Element = CV->getAggregateElement(i);
2089 if (isa<PoisonValue>(Element))
2090 continue;
2091 auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
2092 if (!ElementCI) {
2093 Known.resetAll();
2094 return;
2095 }
2096 const APInt &Elt = ElementCI->getValue();
2097 Known.Zero &= ~Elt;
2098 Known.One &= Elt;
2099 }
2100 if (Known.hasConflict())
2101 Known.resetAll();
2102 return;
2103 }
2104
2105 // Start out not knowing anything.
2106 Known.resetAll();
2107
2108 // We can't imply anything about undefs.
2109 if (isa<UndefValue>(V))
2110 return;
2111
2112 // There's no point in looking through other users of ConstantData for
2113 // assumptions. Confirm that we've handled them all.
2114 assert(!isa<ConstantData>(V) && "Unhandled constant data!");
2115
2116 if (const auto *A = dyn_cast<Argument>(V))
2117 if (std::optional<ConstantRange> Range = A->getRange())
2118 Known = Range->toKnownBits();
2119
2120 // All recursive calls that increase depth must come after this.
2122 return;
2123
2124 // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
2125 // the bits of its aliasee.
2126 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2127 if (!GA->isInterposable())
2128 computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q);
2129 return;
2130 }
2131
2132 if (const Operator *I = dyn_cast<Operator>(V))
2133 computeKnownBitsFromOperator(I, DemandedElts, Known, Depth, Q);
2134 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
2135 if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange())
2136 Known = CR->toKnownBits();
2137 }
2138
2139 // Aligned pointers have trailing zeros - refine Known.Zero set
2140 if (isa<PointerType>(V->getType())) {
2141 Align Alignment = V->getPointerAlignment(Q.DL);
2142 Known.Zero.setLowBits(Log2(Alignment));
2143 }
2144
2145 // computeKnownBitsFromContext strictly refines Known.
2146 // Therefore, we run them after computeKnownBitsFromOperator.
2147
2148 // Check whether we can determine known bits from context such as assumes.
2149 computeKnownBitsFromContext(V, Known, Depth, Q);
2150}
2151
2152/// Try to detect a recurrence that the value of the induction variable is
2153/// always a power of two (or zero).
2154static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero,
2155 unsigned Depth, SimplifyQuery &Q) {
2156 BinaryOperator *BO = nullptr;
2157 Value *Start = nullptr, *Step = nullptr;
2158 if (!matchSimpleRecurrence(PN, BO, Start, Step))
2159 return false;
2160
2161 // Initial value must be a power of two.
2162 for (const Use &U : PN->operands()) {
2163 if (U.get() == Start) {
2164 // Initial value comes from a different BB, need to adjust context
2165 // instruction for analysis.
2166 Q.CxtI = PN->getIncomingBlock(U)->getTerminator();
2167 if (!isKnownToBeAPowerOfTwo(Start, OrZero, Depth, Q))
2168 return false;
2169 }
2170 }
2171
2172 // Except for Mul, the induction variable must be on the left side of the
2173 // increment expression, otherwise its value can be arbitrary.
2174 if (BO->getOpcode() != Instruction::Mul && BO->getOperand(1) != Step)
2175 return false;
2176
2177 Q.CxtI = BO->getParent()->getTerminator();
2178 switch (BO->getOpcode()) {
2179 case Instruction::Mul:
2180 // Power of two is closed under multiplication.
2181 return (OrZero || Q.IIQ.hasNoUnsignedWrap(BO) ||
2182 Q.IIQ.hasNoSignedWrap(BO)) &&
2183 isKnownToBeAPowerOfTwo(Step, OrZero, Depth, Q);
2184 case Instruction::SDiv:
2185 // Start value must not be signmask for signed division, so simply being a
2186 // power of two is not sufficient, and it has to be a constant.
2187 if (!match(Start, m_Power2()) || match(Start, m_SignMask()))
2188 return false;
2189 [[fallthrough]];
2190 case Instruction::UDiv:
2191 // Divisor must be a power of two.
2192 // If OrZero is false, cannot guarantee induction variable is non-zero after
2193 // division, same for Shr, unless it is exact division.
2194 return (OrZero || Q.IIQ.isExact(BO)) &&
2195 isKnownToBeAPowerOfTwo(Step, false, Depth, Q);
2196 case Instruction::Shl:
2197 return OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO);
2198 case Instruction::AShr:
2199 if (!match(Start, m_Power2()) || match(Start, m_SignMask()))
2200 return false;
2201 [[fallthrough]];
2202 case Instruction::LShr:
2203 return OrZero || Q.IIQ.isExact(BO);
2204 default:
2205 return false;
2206 }
2207}
2208
2209/// Return true if the given value is known to have exactly one
2210/// bit set when defined. For vectors return true if every element is known to
2211/// be a power of two when defined. Supports values with integer or pointer
2212/// types and vectors of integers.
2213bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
2214 const SimplifyQuery &Q) {
2215 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
2216
2217 if (isa<Constant>(V))
2218 return OrZero ? match(V, m_Power2OrZero()) : match(V, m_Power2());
2219
2220 // i1 is by definition a power of 2 or zero.
2221 if (OrZero && V->getType()->getScalarSizeInBits() == 1)
2222 return true;
2223
2224 auto *I = dyn_cast<Instruction>(V);
2225 if (!I)
2226 return false;
2227
2228 if (Q.CxtI && match(V, m_VScale())) {
2229 const Function *F = Q.CxtI->getFunction();
2230 // The vscale_range indicates vscale is a power-of-two.
2231 return F->hasFnAttribute(Attribute::VScaleRange);
2232 }
2233
2234 // 1 << X is clearly a power of two if the one is not shifted off the end. If
2235 // it is shifted off the end then the result is undefined.
2236 if (match(I, m_Shl(m_One(), m_Value())))
2237 return true;
2238
2239 // (signmask) >>l X is clearly a power of two if the one is not shifted off
2240 // the bottom. If it is shifted off the bottom then the result is undefined.
2241 if (match(I, m_LShr(m_SignMask(), m_Value())))
2242 return true;
2243
2244 // The remaining tests are all recursive, so bail out if we hit the limit.
2246 return false;
2247
2248 switch (I->getOpcode()) {
2249 case Instruction::ZExt:
2250 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2251 case Instruction::Trunc:
2252 return OrZero && isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2253 case Instruction::Shl:
2254 if (OrZero || Q.IIQ.hasNoUnsignedWrap(I) || Q.IIQ.hasNoSignedWrap(I))
2255 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2256 return false;
2257 case Instruction::LShr:
2258 if (OrZero || Q.IIQ.isExact(cast<BinaryOperator>(I)))
2259 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2260 return false;
2261 case Instruction::UDiv:
2262 if (Q.IIQ.isExact(cast<BinaryOperator>(I)))
2263 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2264 return false;
2265 case Instruction::Mul:
2266 return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) &&
2267 isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q) &&
2268 (OrZero || isKnownNonZero(I, Q, Depth));
2269 case Instruction::And:
2270 // A power of two and'd with anything is a power of two or zero.
2271 if (OrZero &&
2272 (isKnownToBeAPowerOfTwo(I->getOperand(1), /*OrZero*/ true, Depth, Q) ||
2273 isKnownToBeAPowerOfTwo(I->getOperand(0), /*OrZero*/ true, Depth, Q)))
2274 return true;
2275 // X & (-X) is always a power of two or zero.
2276 if (match(I->getOperand(0), m_Neg(m_Specific(I->getOperand(1)))) ||
2277 match(I->getOperand(1), m_Neg(m_Specific(I->getOperand(0)))))
2278 return OrZero || isKnownNonZero(I->getOperand(0), Q, Depth);
2279 return false;
2280 case Instruction::Add: {
2281 // Adding a power-of-two or zero to the same power-of-two or zero yields
2282 // either the original power-of-two, a larger power-of-two or zero.
2283 const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
2284 if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO) ||
2285 Q.IIQ.hasNoSignedWrap(VOBO)) {
2286 if (match(I->getOperand(0),
2287 m_c_And(m_Specific(I->getOperand(1)), m_Value())) &&
2288 isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q))
2289 return true;
2290 if (match(I->getOperand(1),
2291 m_c_And(m_Specific(I->getOperand(0)), m_Value())) &&
2292 isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q))
2293 return true;
2294
2295 unsigned BitWidth = V->getType()->getScalarSizeInBits();
2296 KnownBits LHSBits(BitWidth);
2297 computeKnownBits(I->getOperand(0), LHSBits, Depth, Q);
2298
2299 KnownBits RHSBits(BitWidth);
2300 computeKnownBits(I->getOperand(1), RHSBits, Depth, Q);
2301 // If i8 V is a power of two or zero:
2302 // ZeroBits: 1 1 1 0 1 1 1 1
2303 // ~ZeroBits: 0 0 0 1 0 0 0 0
2304 if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2())
2305 // If OrZero isn't set, we cannot give back a zero result.
2306 // Make sure either the LHS or RHS has a bit set.
2307 if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue())
2308 return true;
2309 }
2310
2311 // LShr(UINT_MAX, Y) + 1 is a power of two (if add is nuw) or zero.
2312 if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO))
2313 if (match(I, m_Add(m_LShr(m_AllOnes(), m_Value()), m_One())))
2314 return true;
2315 return false;
2316 }
2317 case Instruction::Select:
2318 return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) &&
2319 isKnownToBeAPowerOfTwo(I->getOperand(2), OrZero, Depth, Q);
2320 case Instruction::PHI: {
2321 // A PHI node is power of two if all incoming values are power of two, or if
2322 // it is an induction variable where in each step its value is a power of
2323 // two.
2324 auto *PN = cast<PHINode>(I);
2326
2327 // Check if it is an induction variable and always power of two.
2328 if (isPowerOfTwoRecurrence(PN, OrZero, Depth, RecQ))
2329 return true;
2330
2331 // Recursively check all incoming values. Limit recursion to 2 levels, so
2332 // that search complexity is limited to number of operands^2.
2333 unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
2334 return llvm::all_of(PN->operands(), [&](const Use &U) {
2335 // Value is power of 2 if it is coming from PHI node itself by induction.
2336 if (U.get() == PN)
2337 return true;
2338
2339 // Change the context instruction to the incoming block where it is
2340 // evaluated.
2341 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
2342 return isKnownToBeAPowerOfTwo(U.get(), OrZero, NewDepth, RecQ);
2343 });
2344 }
2345 case Instruction::Invoke:
2346 case Instruction::Call: {
2347 if (auto *II = dyn_cast<IntrinsicInst>(I)) {
2348 switch (II->getIntrinsicID()) {
2349 case Intrinsic::umax:
2350 case Intrinsic::smax:
2351 case Intrinsic::umin:
2352 case Intrinsic::smin:
2353 return isKnownToBeAPowerOfTwo(II->getArgOperand(1), OrZero, Depth, Q) &&
2354 isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q);
2355 // bswap/bitreverse just move around bits, but don't change any 1s/0s
2356 // thus dont change pow2/non-pow2 status.
2357 case Intrinsic::bitreverse:
2358 case Intrinsic::bswap:
2359 return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q);
2360 case Intrinsic::fshr:
2361 case Intrinsic::fshl:
2362 // If Op0 == Op1, this is a rotate. is_pow2(rotate(x, y)) == is_pow2(x)
2363 if (II->getArgOperand(0) == II->getArgOperand(1))
2364 return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q);
2365 break;
2366 default:
2367 break;
2368 }
2369 }
2370 return false;
2371 }
2372 default:
2373 return false;
2374 }
2375}
2376
2377/// Test whether a GEP's result is known to be non-null.
2378///
2379/// Uses properties inherent in a GEP to try to determine whether it is known
2380/// to be non-null.
2381///
2382/// Currently this routine does not support vector GEPs.
2383static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
2384 const SimplifyQuery &Q) {
2385 const Function *F = nullptr;
2386 if (const Instruction *I = dyn_cast<Instruction>(GEP))
2387 F = I->getFunction();
2388
2389 // If the gep is nuw or inbounds with invalid null pointer, then the GEP
2390 // may be null iff the base pointer is null and the offset is zero.
2391 if (!GEP->hasNoUnsignedWrap() &&
2392 !(GEP->isInBounds() &&
2393 !NullPointerIsDefined(F, GEP->getPointerAddressSpace())))
2394 return false;
2395
2396 // FIXME: Support vector-GEPs.
2397 assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP");
2398
2399 // If the base pointer is non-null, we cannot walk to a null address with an
2400 // inbounds GEP in address space zero.
2401 if (isKnownNonZero(GEP->getPointerOperand(), Q, Depth))
2402 return true;
2403
2404 // Walk the GEP operands and see if any operand introduces a non-zero offset.
2405 // If so, then the GEP cannot produce a null pointer, as doing so would
2406 // inherently violate the inbounds contract within address space zero.
2408 GTI != GTE; ++GTI) {
2409 // Struct types are easy -- they must always be indexed by a constant.
2410 if (StructType *STy = GTI.getStructTypeOrNull()) {
2411 ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
2412 unsigned ElementIdx = OpC->getZExtValue();
2413 const StructLayout *SL = Q.DL.getStructLayout(STy);
2414 uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
2415 if (ElementOffset > 0)
2416 return true;
2417 continue;
2418 }
2419
2420 // If we have a zero-sized type, the index doesn't matter. Keep looping.
2421 if (GTI.getSequentialElementStride(Q.DL).isZero())
2422 continue;
2423
2424 // Fast path the constant operand case both for efficiency and so we don't
2425 // increment Depth when just zipping down an all-constant GEP.
2426 if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) {
2427 if (!OpC->isZero())
2428 return true;
2429 continue;
2430 }
2431
2432 // We post-increment Depth here because while isKnownNonZero increments it
2433 // as well, when we pop back up that increment won't persist. We don't want
2434 // to recurse 10k times just because we have 10k GEP operands. We don't
2435 // bail completely out because we want to handle constant GEPs regardless
2436 // of depth.
2438 continue;
2439
2440 if (isKnownNonZero(GTI.getOperand(), Q, Depth))
2441 return true;
2442 }
2443
2444 return false;
2445}
2446
2448 const Instruction *CtxI,
2449 const DominatorTree *DT) {
2450 assert(!isa<Constant>(V) && "Called for constant?");
2451
2452 if (!CtxI || !DT)
2453 return false;
2454
2455 unsigned NumUsesExplored = 0;
2456 for (const auto *U : V->users()) {
2457 // Avoid massive lists
2458 if (NumUsesExplored >= DomConditionsMaxUses)
2459 break;
2460 NumUsesExplored++;
2461
2462 // If the value is used as an argument to a call or invoke, then argument
2463 // attributes may provide an answer about null-ness.
2464 if (const auto *CB = dyn_cast<CallBase>(U))
2465 if (auto *CalledFunc = CB->getCalledFunction())
2466 for (const Argument &Arg : CalledFunc->args())
2467 if (CB->getArgOperand(Arg.getArgNo()) == V &&
2468 Arg.hasNonNullAttr(/* AllowUndefOrPoison */ false) &&
2469 DT->dominates(CB, CtxI))
2470 return true;
2471
2472 // If the value is used as a load/store, then the pointer must be non null.
2473 if (V == getLoadStorePointerOperand(U)) {
2474 const Instruction *I = cast<Instruction>(U);
2475 if (!NullPointerIsDefined(I->getFunction(),
2476 V->getType()->getPointerAddressSpace()) &&
2477 DT->dominates(I, CtxI))
2478 return true;
2479 }
2480
2481 if ((match(U, m_IDiv(m_Value(), m_Specific(V))) ||
2482 match(U, m_IRem(m_Value(), m_Specific(V)))) &&
2483 isValidAssumeForContext(cast<Instruction>(U), CtxI, DT))
2484 return true;
2485
2486 // Consider only compare instructions uniquely controlling a branch
2487 Value *RHS;
2488 CmpInst::Predicate Pred;
2489 if (!match(U, m_c_ICmp(Pred, m_Specific(V), m_Value(RHS))))
2490 continue;
2491
2492 bool NonNullIfTrue;
2493 if (cmpExcludesZero(Pred, RHS))
2494 NonNullIfTrue = true;
2496 NonNullIfTrue = false;
2497 else
2498 continue;
2499
2502 for (const auto *CmpU : U->users()) {
2503 assert(WorkList.empty() && "Should be!");
2504 if (Visited.insert(CmpU).second)
2505 WorkList.push_back(CmpU);
2506
2507 while (!WorkList.empty()) {
2508 auto *Curr = WorkList.pop_back_val();
2509
2510 // If a user is an AND, add all its users to the work list. We only
2511 // propagate "pred != null" condition through AND because it is only
2512 // correct to assume that all conditions of AND are met in true branch.
2513 // TODO: Support similar logic of OR and EQ predicate?
2514 if (NonNullIfTrue)
2515 if (match(Curr, m_LogicalAnd(m_Value(), m_Value()))) {
2516 for (const auto *CurrU : Curr->users())
2517 if (Visited.insert(CurrU).second)
2518 WorkList.push_back(CurrU);
2519 continue;
2520 }
2521
2522 if (const BranchInst *BI = dyn_cast<BranchInst>(Curr)) {
2523 assert(BI->isConditional() && "uses a comparison!");
2524
2525 BasicBlock *NonNullSuccessor =
2526 BI->getSuccessor(NonNullIfTrue ? 0 : 1);
2527 BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
2528 if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
2529 return true;
2530 } else if (NonNullIfTrue && isGuard(Curr) &&
2531 DT->dominates(cast<Instruction>(Curr), CtxI)) {
2532 return true;
2533 }
2534 }
2535 }
2536 }
2537
2538 return false;
2539}
2540
2541/// Does the 'Range' metadata (which must be a valid MD_range operand list)
2542/// ensure that the value it's attached to is never Value? 'RangeType' is
2543/// is the type of the value described by the range.
2544static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) {
2545 const unsigned NumRanges = Ranges->getNumOperands() / 2;
2546 assert(NumRanges >= 1);
2547 for (unsigned i = 0; i < NumRanges; ++i) {
2549 mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 0));
2551 mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 1));
2552 ConstantRange Range(Lower->getValue(), Upper->getValue());
2553 if (Range.contains(Value))
2554 return false;
2555 }
2556 return true;
2557}
2558
2559/// Try to detect a recurrence that monotonically increases/decreases from a
2560/// non-zero starting value. These are common as induction variables.
2561static bool isNonZeroRecurrence(const PHINode *PN) {
2562 BinaryOperator *BO = nullptr;
2563 Value *Start = nullptr, *Step = nullptr;
2564 const APInt *StartC, *StepC;
2565 if (!matchSimpleRecurrence(PN, BO, Start, Step) ||
2566 !match(Start, m_APInt(StartC)) || StartC->isZero())
2567 return false;
2568
2569 switch (BO->getOpcode()) {
2570 case Instruction::Add:
2571 // Starting from non-zero and stepping away from zero can never wrap back
2572 // to zero.
2573 return BO->hasNoUnsignedWrap() ||
2574 (BO->hasNoSignedWrap() && match(Step, m_APInt(StepC)) &&
2575 StartC->isNegative() == StepC->isNegative());
2576 case Instruction::Mul:
2577 return (BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap()) &&
2578 match(Step, m_APInt(StepC)) && !StepC->isZero();
2579 case Instruction::Shl:
2580 return BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap();
2581 case Instruction::AShr:
2582 case Instruction::LShr:
2583 return BO->isExact();
2584 default:
2585 return false;
2586 }
2587}
2588
2589static bool matchOpWithOpEqZero(Value *Op0, Value *Op1) {
2590 return match(Op0, m_ZExtOrSExt(m_SpecificICmp(ICmpInst::ICMP_EQ,
2591 m_Specific(Op1), m_Zero()))) ||
2592 match(Op1, m_ZExtOrSExt(m_SpecificICmp(ICmpInst::ICMP_EQ,
2593 m_Specific(Op0), m_Zero())));
2594}
2595
2596static bool isNonZeroAdd(const APInt &DemandedElts, unsigned Depth,
2597 const SimplifyQuery &Q, unsigned BitWidth, Value *X,
2598 Value *Y, bool NSW, bool NUW) {
2599 // (X + (X != 0)) is non zero
2600 if (matchOpWithOpEqZero(X, Y))
2601 return true;
2602
2603 if (NUW)
2604 return isKnownNonZero(Y, DemandedElts, Q, Depth) ||
2605 isKnownNonZero(X, DemandedElts, Q, Depth);
2606
2607 KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q);
2608 KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q);
2609
2610 // If X and Y are both non-negative (as signed values) then their sum is not
2611 // zero unless both X and Y are zero.
2612 if (XKnown.isNonNegative() && YKnown.isNonNegative())
2613 if (isKnownNonZero(Y, DemandedElts, Q, Depth) ||
2614 isKnownNonZero(X, DemandedElts, Q, Depth))
2615 return true;
2616
2617 // If X and Y are both negative (as signed values) then their sum is not
2618 // zero unless both X and Y equal INT_MIN.
2619 if (XKnown.isNegative() && YKnown.isNegative()) {
2621 // The sign bit of X is set. If some other bit is set then X is not equal
2622 // to INT_MIN.
2623 if (XKnown.One.intersects(Mask))
2624 return true;
2625 // The sign bit of Y is set. If some other bit is set then Y is not equal
2626 // to INT_MIN.
2627 if (YKnown.One.intersects(Mask))
2628 return true;
2629 }
2630
2631 // The sum of a non-negative number and a power of two is not zero.
2632 if (XKnown.isNonNegative() &&
2633 isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q))
2634 return true;
2635 if (YKnown.isNonNegative() &&
2636 isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q))
2637 return true;
2638
2639 return KnownBits::add(XKnown, YKnown, NSW, NUW).isNonZero();
2640}
2641
2642static bool isNonZeroSub(const APInt &DemandedElts, unsigned Depth,
2643 const SimplifyQuery &Q, unsigned BitWidth, Value *X,
2644 Value *Y) {
2645 // (X - (X != 0)) is non zero
2646 // ((X != 0) - X) is non zero
2647 if (matchOpWithOpEqZero(X, Y))
2648 return true;
2649
2650 // TODO: Move this case into isKnownNonEqual().
2651 if (auto *C = dyn_cast<Constant>(X))
2652 if (C->isNullValue() && isKnownNonZero(Y, DemandedElts, Q, Depth))
2653 return true;
2654
2655 return ::isKnownNonEqual(X, Y, DemandedElts, Depth, Q);
2656}
2657
2658static bool isNonZeroMul(const APInt &DemandedElts, unsigned Depth,
2659 const SimplifyQuery &Q, unsigned BitWidth, Value *X,
2660 Value *Y, bool NSW, bool NUW) {
2661 // If X and Y are non-zero then so is X * Y as long as the multiplication
2662 // does not overflow.
2663 if (NSW || NUW)
2664 return isKnownNonZero(X, DemandedElts, Q, Depth) &&
2665 isKnownNonZero(Y, DemandedElts, Q, Depth);
2666
2667 // If either X or Y is odd, then if the other is non-zero the result can't
2668 // be zero.
2669 KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q);
2670 if (XKnown.One[0])
2671 return isKnownNonZero(Y, DemandedElts, Q, Depth);
2672
2673 KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q);
2674 if (YKnown.One[0])
2675 return XKnown.isNonZero() || isKnownNonZero(X, DemandedElts, Q, Depth);
2676
2677 // If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is
2678 // non-zero, then X * Y is non-zero. We can find sX and sY by just taking
2679 // the lowest known One of X and Y. If they are non-zero, the result
2680 // must be non-zero. We can check if LSB(X) * LSB(Y) != 0 by doing
2681 // X.CountLeadingZeros + Y.CountLeadingZeros < BitWidth.
2682 return (XKnown.countMaxTrailingZeros() + YKnown.countMaxTrailingZeros()) <
2683 BitWidth;
2684}
2685
2686static bool isNonZeroShift(const Operator *I, const APInt &DemandedElts,
2687 unsigned Depth, const SimplifyQuery &Q,
2688 const KnownBits &KnownVal) {
2689 auto ShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
2690 switch (I->getOpcode()) {
2691 case Instruction::Shl:
2692 return Lhs.shl(Rhs);
2693 case Instruction::LShr:
2694 return Lhs.lshr(Rhs);
2695 case Instruction::AShr:
2696 return Lhs.ashr(Rhs);
2697 default:
2698 llvm_unreachable("Unknown Shift Opcode");
2699 }
2700 };
2701
2702 auto InvShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
2703 switch (I->getOpcode()) {
2704 case Instruction::Shl:
2705 return Lhs.lshr(Rhs);
2706 case Instruction::LShr:
2707 case Instruction::AShr:
2708 return Lhs.shl(Rhs);
2709 default:
2710 llvm_unreachable("Unknown Shift Opcode");
2711 }
2712 };
2713
2714 if (KnownVal.isUnknown())
2715 return false;
2716
2717 KnownBits KnownCnt =
2718 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q);
2719 APInt MaxShift = KnownCnt.getMaxValue();
2720 unsigned NumBits = KnownVal.getBitWidth();
2721 if (MaxShift.uge(NumBits))
2722 return false;
2723
2724 if (!ShiftOp(KnownVal.One, MaxShift).isZero())
2725 return true;
2726
2727 // If all of the bits shifted out are known to be zero, and Val is known
2728 // non-zero then at least one non-zero bit must remain.
2729 if (InvShiftOp(KnownVal.Zero, NumBits - MaxShift)
2730 .eq(InvShiftOp(APInt::getAllOnes(NumBits), NumBits - MaxShift)) &&
2731 isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth))
2732 return true;
2733
2734 return false;
2735}
2736
2738 const APInt &DemandedElts,
2739 unsigned Depth, const SimplifyQuery &Q) {
2740 unsigned BitWidth = getBitWidth(I->getType()->getScalarType(), Q.DL);
2741 switch (I->getOpcode()) {
2742 case Instruction::Alloca:
2743 // Alloca never returns null, malloc might.
2744 return I->getType()->getPointerAddressSpace() == 0;
2745 case Instruction::GetElementPtr:
2746 if (I->getType()->isPointerTy())
2747 return isGEPKnownNonNull(cast<GEPOperator>(I), Depth, Q);
2748 break;
2749 case Instruction::BitCast: {
2750 // We need to be a bit careful here. We can only peek through the bitcast
2751 // if the scalar size of elements in the operand are smaller than and a
2752 // multiple of the size they are casting too. Take three cases:
2753 //
2754 // 1) Unsafe:
2755 // bitcast <2 x i16> %NonZero to <4 x i8>
2756 //
2757 // %NonZero can have 2 non-zero i16 elements, but isKnownNonZero on a
2758 // <4 x i8> requires that all 4 i8 elements be non-zero which isn't
2759 // guranteed (imagine just sign bit set in the 2 i16 elements).
2760 //
2761 // 2) Unsafe:
2762 // bitcast <4 x i3> %NonZero to <3 x i4>
2763 //
2764 // Even though the scalar size of the src (`i3`) is smaller than the
2765 // scalar size of the dst `i4`, because `i3` is not a multiple of `i4`
2766 // its possible for the `3 x i4` elements to be zero because there are
2767 // some elements in the destination that don't contain any full src
2768 // element.
2769 //
2770 // 3) Safe:
2771 // bitcast <4 x i8> %NonZero to <2 x i16>
2772 //
2773 // This is always safe as non-zero in the 4 i8 elements implies
2774 // non-zero in the combination of any two adjacent ones. Since i8 is a
2775 // multiple of i16, each i16 is guranteed to have 2 full i8 elements.
2776 // This all implies the 2 i16 elements are non-zero.
2777 Type *FromTy = I->getOperand(0)->getType();
2778 if ((FromTy->isIntOrIntVectorTy() || FromTy->isPtrOrPtrVectorTy()) &&
2779 (BitWidth % getBitWidth(FromTy->getScalarType(), Q.DL)) == 0)
2780 return isKnownNonZero(I->getOperand(0), Q, Depth);
2781 } break;
2782 case Instruction::IntToPtr:
2783 // Note that we have to take special care to avoid looking through
2784 // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well
2785 // as casts that can alter the value, e.g., AddrSpaceCasts.
2786 if (!isa<ScalableVectorType>(I->getType()) &&
2787 Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <=
2788 Q.DL.getTypeSizeInBits(I->getType()).getFixedValue())
2789 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2790 break;
2791 case Instruction::PtrToInt:
2792 // Similar to int2ptr above, we can look through ptr2int here if the cast
2793 // is a no-op or an extend and not a truncate.
2794 if (!isa<ScalableVectorType>(I->getType()) &&
2795 Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <=
2796 Q.DL.getTypeSizeInBits(I->getType()).getFixedValue())
2797 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2798 break;
2799 case Instruction::Trunc:
2800 // nuw/nsw trunc preserves zero/non-zero status of input.
2801 if (auto *TI = dyn_cast<TruncInst>(I))
2802 if (TI->hasNoSignedWrap() || TI->hasNoUnsignedWrap())
2803 return isKnownNonZero(TI->getOperand(0), DemandedElts, Q, Depth);
2804 break;
2805
2806 case Instruction::Sub:
2807 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth, I->getOperand(0),
2808 I->getOperand(1));
2809 case Instruction::Xor:
2810 // (X ^ (X != 0)) is non zero
2811 if (matchOpWithOpEqZero(I->getOperand(0), I->getOperand(1)))
2812 return true;
2813 break;
2814 case Instruction::Or:
2815 // (X | (X != 0)) is non zero
2816 if (matchOpWithOpEqZero(I->getOperand(0), I->getOperand(1)))
2817 return true;
2818 // X | Y != 0 if X != 0 or Y != 0.
2819 return isKnownNonZero(I->getOperand(1), DemandedElts, Q, Depth) ||
2820 isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2821 case Instruction::SExt:
2822 case Instruction::ZExt:
2823 // ext X != 0 if X != 0.
2824 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2825
2826 case Instruction::Shl: {
2827 // shl nsw/nuw can't remove any non-zero bits.
2828 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I);
2829 if (Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO))
2830 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2831
2832 // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined
2833 // if the lowest bit is shifted off the end.
2834 KnownBits Known(BitWidth);
2835 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth, Q);
2836 if (Known.One[0])
2837 return true;
2838
2839 return isNonZeroShift(I, DemandedElts, Depth, Q, Known);
2840 }
2841 case Instruction::LShr:
2842 case Instruction::AShr: {
2843 // shr exact can only shift out zero bits.
2844 const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(I);
2845 if (BO->isExact())
2846 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2847
2848 // shr X, Y != 0 if X is negative. Note that the value of the shift is not
2849 // defined if the sign bit is shifted off the end.
2850 KnownBits Known =
2851 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q);
2852 if (Known.isNegative())
2853 return true;
2854
2855 return isNonZeroShift(I, DemandedElts, Depth, Q, Known);
2856 }
2857 case Instruction::UDiv:
2858 case Instruction::SDiv: {
2859 // X / Y
2860 // div exact can only produce a zero if the dividend is zero.
2861 if (cast<PossiblyExactOperator>(I)->isExact())
2862 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2863
2864 KnownBits XKnown =
2865 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q);
2866 // If X is fully unknown we won't be able to figure anything out so don't
2867 // both computing knownbits for Y.
2868 if (XKnown.isUnknown())
2869 return false;
2870
2871 KnownBits YKnown =
2872 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q);
2873 if (I->getOpcode() == Instruction::SDiv) {
2874 // For signed division need to compare abs value of the operands.
2875 XKnown = XKnown.abs(/*IntMinIsPoison*/ false);
2876 YKnown = YKnown.abs(/*IntMinIsPoison*/ false);
2877 }
2878 // If X u>= Y then div is non zero (0/0 is UB).
2879 std::optional<bool> XUgeY = KnownBits::uge(XKnown, YKnown);
2880 // If X is total unknown or X u< Y we won't be able to prove non-zero
2881 // with compute known bits so just return early.
2882 return XUgeY && *XUgeY;
2883 }
2884 case Instruction::Add: {
2885 // X + Y.
2886
2887 // If Add has nuw wrap flag, then if either X or Y is non-zero the result is
2888 // non-zero.
2889 auto *BO = cast<OverflowingBinaryOperator>(I);
2890 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth, I->getOperand(0),
2891 I->getOperand(1), Q.IIQ.hasNoSignedWrap(BO),
2892 Q.IIQ.hasNoUnsignedWrap(BO));
2893 }
2894 case Instruction::Mul: {
2895 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I);
2896 return isNonZeroMul(DemandedElts, Depth, Q, BitWidth, I->getOperand(0),
2897 I->getOperand(1), Q.IIQ.hasNoSignedWrap(BO),
2898 Q.IIQ.hasNoUnsignedWrap(BO));
2899 }
2900 case Instruction::Select: {
2901 // (C ? X : Y) != 0 if X != 0 and Y != 0.
2902
2903 // First check if the arm is non-zero using `isKnownNonZero`. If that fails,
2904 // then see if the select condition implies the arm is non-zero. For example
2905 // (X != 0 ? X : Y), we know the true arm is non-zero as the `X` "return" is
2906 // dominated by `X != 0`.
2907 auto SelectArmIsNonZero = [&](bool IsTrueArm) {
2908 Value *Op;
2909 Op = IsTrueArm ? I->getOperand(1) : I->getOperand(2);
2910 // Op is trivially non-zero.
2911 if (isKnownNonZero(Op, DemandedElts, Q, Depth))
2912 return true;
2913
2914 // The condition of the select dominates the true/false arm. Check if the
2915 // condition implies that a given arm is non-zero.
2916 Value *X;
2917 CmpInst::Predicate Pred;
2918 if (!match(I->getOperand(0), m_c_ICmp(Pred, m_Specific(Op), m_Value(X))))
2919 return false;
2920
2921 if (!IsTrueArm)
2922 Pred = ICmpInst::getInversePredicate(Pred);
2923
2924 return cmpExcludesZero(Pred, X);
2925 };
2926
2927 if (SelectArmIsNonZero(/* IsTrueArm */ true) &&
2928 SelectArmIsNonZero(/* IsTrueArm */ false))
2929 return true;
2930 break;
2931 }
2932 case Instruction::PHI: {
2933 auto *PN = cast<PHINode>(I);
2935 return true;
2936
2937 // Check if all incoming values are non-zero using recursion.
2939 unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
2940 return llvm::all_of(PN->operands(), [&](const Use &U) {
2941 if (U.get() == PN)
2942 return true;
2943 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
2944 // Check if the branch on the phi excludes zero.
2945 ICmpInst::Predicate Pred;
2946 Value *X;
2947 BasicBlock *TrueSucc, *FalseSucc;
2948 if (match(RecQ.CxtI,
2949 m_Br(m_c_ICmp(Pred, m_Specific(U.get()), m_Value(X)),
2950 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) {
2951 // Check for cases of duplicate successors.
2952 if ((TrueSucc == PN->getParent()) != (FalseSucc == PN->getParent())) {
2953 // If we're using the false successor, invert the predicate.
2954 if (FalseSucc == PN->getParent())
2955 Pred = CmpInst::getInversePredicate(Pred);
2956 if (cmpExcludesZero(Pred, X))
2957 return true;
2958 }
2959 }
2960 // Finally recurse on the edge and check it directly.
2961 return isKnownNonZero(U.get(), DemandedElts, RecQ, NewDepth);
2962 });
2963 }
2964 case Instruction::InsertElement: {
2965 if (isa<ScalableVectorType>(I->getType()))
2966 break;
2967
2968 const Value *Vec = I->getOperand(0);
2969 const Value *Elt = I->getOperand(1);
2970 auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2));
2971
2972 unsigned NumElts = DemandedElts.getBitWidth();
2973 APInt DemandedVecElts = DemandedElts;
2974 bool SkipElt = false;
2975 // If we know the index we are inserting too, clear it from Vec check.
2976 if (CIdx && CIdx->getValue().ult(NumElts)) {
2977 DemandedVecElts.clearBit(CIdx->getZExtValue());
2978 SkipElt = !DemandedElts[CIdx->getZExtValue()];
2979 }
2980
2981 // Result is zero if Elt is non-zero and rest of the demanded elts in Vec
2982 // are non-zero.
2983 return (SkipElt || isKnownNonZero(Elt, Q, Depth)) &&
2984 (DemandedVecElts.isZero() ||
2985 isKnownNonZero(Vec, DemandedVecElts, Q, Depth));
2986 }
2987 case Instruction::ExtractElement:
2988 if (const auto *EEI = dyn_cast<ExtractElementInst>(I)) {
2989 const Value *Vec = EEI->getVectorOperand();
2990 const Value *Idx = EEI->getIndexOperand();
2991 auto *CIdx = dyn_cast<ConstantInt>(Idx);
2992 if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) {
2993 unsigned NumElts = VecTy->getNumElements();
2994 APInt DemandedVecElts = APInt::getAllOnes(NumElts);
2995 if (CIdx && CIdx->getValue().ult(NumElts))
2996 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
2997 return isKnownNonZero(Vec, DemandedVecElts, Q, Depth);
2998 }
2999 }
3000 break;
3001 case Instruction::ShuffleVector: {
3002 auto *Shuf = dyn_cast<ShuffleVectorInst>(I);
3003 if (!Shuf)
3004 break;
3005 APInt DemandedLHS, DemandedRHS;
3006 // For undef elements, we don't know anything about the common state of
3007 // the shuffle result.
3008 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
3009 break;
3010 // If demanded elements for both vecs are non-zero, the shuffle is non-zero.
3011 return (DemandedRHS.isZero() ||
3012 isKnownNonZero(Shuf->getOperand(1), DemandedRHS, Q, Depth)) &&
3013 (DemandedLHS.isZero() ||
3014 isKnownNonZero(Shuf->getOperand(0), DemandedLHS, Q, Depth));
3015 }
3016 case Instruction::Freeze:
3017 return isKnownNonZero(I->getOperand(0), Q, Depth) &&
3018 isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT,
3019 Depth);
3020 case Instruction::Load: {
3021 auto *LI = cast<LoadInst>(I);
3022 // A Load tagged with nonnull or dereferenceable with null pointer undefined
3023 // is never null.
3024 if (auto *PtrT = dyn_cast<PointerType>(I->getType())) {
3025 if (Q.IIQ.getMetadata(LI, LLVMContext::MD_nonnull) ||
3026 (Q.IIQ.getMetadata(LI, LLVMContext::MD_dereferenceable) &&
3027 !NullPointerIsDefined(LI->getFunction(), PtrT->getAddressSpace())))
3028 return true;
3029 } else if (MDNode *Ranges = Q.IIQ.getMetadata(LI, LLVMContext::MD_range)) {
3031 }
3032
3033 // No need to fall through to computeKnownBits as range metadata is already
3034 // handled in isKnownNonZero.
3035 return false;
3036 }
3037 case Instruction::ExtractValue: {
3038 const WithOverflowInst *WO;
3039 if (match(I, m_ExtractValue<0>(m_WithOverflowInst(WO)))) {
3040 switch (WO->getBinaryOp()) {
3041 default:
3042 break;
3043 case Instruction::Add:
3044 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth,
3045 WO->getArgOperand(0), WO->getArgOperand(1),
3046 /*NSW=*/false,
3047 /*NUW=*/false);
3048 case Instruction::Sub:
3049 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth,
3050 WO->getArgOperand(0), WO->getArgOperand(1));
3051 case Instruction::Mul:
3052 return isNonZeroMul(DemandedElts, Depth, Q, BitWidth,
3053 WO->getArgOperand(0), WO->getArgOperand(1),
3054 /*NSW=*/false, /*NUW=*/false);
3055 break;
3056 }
3057 }
3058 break;
3059 }
3060 case Instruction::Call:
3061 case Instruction::Invoke: {
3062 const auto *Call = cast<CallBase>(I);
3063 if (I->getType()->isPointerTy()) {
3064 if (Call->isReturnNonNull())
3065 return true;
3066 if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true))
3067 return isKnownNonZero(RP, Q, Depth);
3068 } else {
3069 if (MDNode *Ranges = Q.IIQ.getMetadata(Call, LLVMContext::MD_range))
3071 if (std::optional<ConstantRange> Range = Call->getRange()) {
3072 const APInt ZeroValue(Range->getBitWidth(), 0);
3073 if (!Range->contains(ZeroValue))
3074 return true;
3075 }
3076 if (const Value *RV = Call->getReturnedArgOperand())
3077 if (RV->getType() == I->getType() && isKnownNonZero(RV, Q, Depth))
3078 return true;
3079 }
3080
3081 if (auto *II = dyn_cast<IntrinsicInst>(I)) {
3082 switch (II->getIntrinsicID()) {
3083 case Intrinsic::sshl_sat:
3084 case Intrinsic::ushl_sat:
3085 case Intrinsic::abs:
3086 case Intrinsic::bitreverse:
3087 case Intrinsic::bswap:
3088 case Intrinsic::ctpop:
3089 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth);
3090 // NB: We don't do usub_sat here as in any case we can prove its
3091 // non-zero, we will fold it to `sub nuw` in InstCombine.
3092 case Intrinsic::ssub_sat:
3093 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth,
3094 II->getArgOperand(0), II->getArgOperand(1));
3095 case Intrinsic::sadd_sat:
3096 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth,
3097 II->getArgOperand(0), II->getArgOperand(1),
3098 /*NSW=*/true, /* NUW=*/false);
3099 // Vec reverse preserves zero/non-zero status from input vec.
3100 case Intrinsic::vector_reverse:
3101 return isKnownNonZero(II->getArgOperand(0), DemandedElts.reverseBits(),
3102 Q, Depth);
3103 // umin/smin/smax/smin/or of all non-zero elements is always non-zero.
3104 case Intrinsic::vector_reduce_or:
3105 case Intrinsic::vector_reduce_umax:
3106 case Intrinsic::vector_reduce_umin:
3107 case Intrinsic::vector_reduce_smax:
3108 case Intrinsic::vector_reduce_smin:
3109 return isKnownNonZero(II->getArgOperand(0), Q, Depth);
3110 case Intrinsic::umax:
3111 case Intrinsic::uadd_sat:
3112 // umax(X, (X != 0)) is non zero
3113 // X +usat (X != 0) is non zero
3114 if (matchOpWithOpEqZero(II->getArgOperand(0), II->getArgOperand(1)))
3115 return true;
3116
3117 return isKnownNonZero(II->getArgOperand(1), DemandedElts, Q, Depth) ||
3118 isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth);
3119 case Intrinsic::smax: {
3120 // If either arg is strictly positive the result is non-zero. Otherwise
3121 // the result is non-zero if both ops are non-zero.
3122 auto IsNonZero = [&](Value *Op, std::optional<bool> &OpNonZero,
3123 const KnownBits &OpKnown) {
3124 if (!OpNonZero.has_value())
3125 OpNonZero = OpKnown.isNonZero() ||
3126 isKnownNonZero(Op, DemandedElts, Q, Depth);
3127 return *OpNonZero;
3128 };
3129 // Avoid re-computing isKnownNonZero.
3130 std::optional<bool> Op0NonZero, Op1NonZero;
3131 KnownBits Op1Known =
3132 computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q);
3133 if (Op1Known.isNonNegative() &&
3134 IsNonZero(II->getArgOperand(1), Op1NonZero, Op1Known))
3135 return true;
3136 KnownBits Op0Known =
3137 computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q);
3138 if (Op0Known.isNonNegative() &&
3139 IsNonZero(II->getArgOperand(0), Op0NonZero, Op0Known))
3140 return true;
3141 return IsNonZero(II->getArgOperand(1), Op1NonZero, Op1Known) &&
3142 IsNonZero(II->getArgOperand(0), Op0NonZero, Op0Known);
3143 }
3144 case Intrinsic::smin: {
3145 // If either arg is negative the result is non-zero. Otherwise
3146 // the result is non-zero if both ops are non-zero.
3147 KnownBits Op1Known =
3148 computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q);
3149 if (Op1Known.isNegative())
3150 return true;
3151 KnownBits Op0Known =
3152 computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q);
3153 if (Op0Known.isNegative())
3154 return true;
3155
3156 if (Op1Known.isNonZero() && Op0Known.isNonZero())
3157 return true;
3158 }
3159 [[fallthrough]];
3160 case Intrinsic::umin:
3161 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth) &&
3162 isKnownNonZero(II->getArgOperand(1), DemandedElts, Q, Depth);
3163 case Intrinsic::cttz:
3164 return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q)
3165 .Zero[0];
3166 case Intrinsic::ctlz:
3167 return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q)
3168 .isNonNegative();
3169 case Intrinsic::fshr:
3170 case Intrinsic::fshl:
3171 // If Op0 == Op1, this is a rotate. rotate(x, y) != 0 iff x != 0.
3172 if (II->getArgOperand(0) == II->getArgOperand(1))
3173 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth);
3174 break;
3175 case Intrinsic::vscale:
3176 return true;
3177 case Intrinsic::experimental_get_vector_length:
3178 return isKnownNonZero(I->getOperand(0), Q, Depth);
3179 default:
3180 break;
3181 }
3182 break;
3183 }
3184
3185 return false;
3186 }
3187 }
3188
3189 KnownBits Known(BitWidth);
3190 computeKnownBits(I, DemandedElts, Known, Depth, Q);
3191 return Known.One != 0;
3192}
3193
3194/// Return true if the given value is known to be non-zero when defined. For
3195/// vectors, return true if every demanded element is known to be non-zero when
3196/// defined. For pointers, if the context instruction and dominator tree are
3197/// specified, perform context-sensitive analysis and return true if the
3198/// pointer couldn't possibly be null at the specified instruction.
3199/// Supports values with integer or pointer type and vectors of integers.
3200bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
3201 const SimplifyQuery &Q, unsigned Depth) {
3202 Type *Ty = V->getType();
3203
3204#ifndef NDEBUG
3205 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
3206
3207 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
3208 assert(
3209 FVTy->getNumElements() == DemandedElts.getBitWidth() &&
3210 "DemandedElt width should equal the fixed vector number of elements");
3211 } else {
3212 assert(DemandedElts == APInt(1, 1) &&
3213 "DemandedElt width should be 1 for scalars");
3214 }
3215#endif
3216
3217 if (auto *C = dyn_cast<Constant>(V)) {
3218 if (C->isNullValue())
3219 return false;
3220 if (isa<ConstantInt>(C))
3221 // Must be non-zero due to null test above.
3222 return true;
3223
3224 // For constant vectors, check that all elements are poison or known
3225 // non-zero to determine that the whole vector is known non-zero.
3226 if (auto *VecTy = dyn_cast<FixedVectorType>(Ty)) {
3227 for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) {
3228 if (!DemandedElts[i])
3229 continue;
3230 Constant *Elt = C->getAggregateElement(i);
3231 if (!Elt || Elt->isNullValue())
3232 return false;
3233 if (!isa<PoisonValue>(Elt) && !isa<ConstantInt>(Elt))
3234 return false;
3235 }
3236 return true;
3237 }
3238
3239 // Constant ptrauth can be null, iff the base pointer can be.
3240 if (auto *CPA = dyn_cast<ConstantPtrAuth>(V))
3241 return isKnownNonZero(CPA->getPointer(), DemandedElts, Q, Depth);
3242
3243 // A global variable in address space 0 is non null unless extern weak
3244 // or an absolute symbol reference. Other address spaces may have null as a
3245 // valid address for a global, so we can't assume anything.
3246 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
3247 if (!GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() &&
3248 GV->getType()->getAddressSpace() == 0)
3249 return true;
3250 }
3251
3252 // For constant expressions, fall through to the Operator code below.
3253 if (!isa<ConstantExpr>(V))
3254 return false;
3255 }
3256
3257 if (const auto *A = dyn_cast<Argument>(V))
3258 if (std::optional<ConstantRange> Range = A->getRange()) {
3259 const APInt ZeroValue(Range->getBitWidth(), 0);
3260 if (!Range->contains(ZeroValue))
3261 return true;
3262 }
3263
3264 if (!isa<Constant>(V) && isKnownNonZeroFromAssume(V, Q))
3265 return true;
3266
3267 // Some of the tests below are recursive, so bail out if we hit the limit.
3269 return false;
3270
3271 // Check for pointer simplifications.
3272
3273 if (PointerType *PtrTy = dyn_cast<PointerType>(Ty)) {
3274 // A byval, inalloca may not be null in a non-default addres space. A
3275 // nonnull argument is assumed never 0.
3276 if (const Argument *A = dyn_cast<Argument>(V)) {
3277 if (((A->hasPassPointeeByValueCopyAttr() &&
3278 !NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())) ||
3279 A->hasNonNullAttr()))
3280 return true;
3281 }
3282 }
3283
3284 if (const auto *I = dyn_cast<Operator>(V))
3285 if (isKnownNonZeroFromOperator(I, DemandedElts, Depth, Q))
3286 return true;
3287
3288 if (!isa<Constant>(V) &&
3290 return true;
3291
3292 return false;
3293}
3294
3296 unsigned Depth) {
3297 auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
3298 APInt DemandedElts =
3299 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
3300 return ::isKnownNonZero(V, DemandedElts, Q, Depth);
3301}
3302
3303/// If the pair of operators are the same invertible function, return the
3304/// the operands of the function corresponding to each input. Otherwise,
3305/// return std::nullopt. An invertible function is one that is 1-to-1 and maps
3306/// every input value to exactly one output value. This is equivalent to
3307/// saying that Op1 and Op2 are equal exactly when the specified pair of
3308/// operands are equal, (except that Op1 and Op2 may be poison more often.)
3309static std::optional<std::pair<Value*, Value*>>
3311 const Operator *Op2) {
3312 if (Op1->getOpcode() != Op2->getOpcode())
3313 return std::nullopt;
3314
3315 auto getOperands = [&](unsigned OpNum) -> auto {
3316 return std::make_pair(Op1->getOperand(OpNum), Op2->getOperand(OpNum));
3317 };
3318
3319 switch (Op1->getOpcode()) {
3320 default:
3321 break;
3322 case Instruction::Or:
3323 if (!cast<PossiblyDisjointInst>(Op1)->isDisjoint() ||
3324 !cast<PossiblyDisjointInst>(Op2)->isDisjoint())
3325 break;
3326 [[fallthrough]];
3327 case Instruction::Xor:
3328 case Instruction::Add: {
3329 Value *Other;
3330 if (match(Op2, m_c_BinOp(m_Specific(Op1->getOperand(0)), m_Value(Other))))
3331 return std::make_pair(Op1->getOperand(1), Other);
3332 if (match(Op2, m_c_BinOp(m_Specific(Op1->getOperand(1)), m_Value(Other))))
3333 return std::make_pair(Op1->getOperand(0), Other);
3334 break;
3335 }
3336 case Instruction::Sub:
3337 if (Op1->getOperand(0) == Op2->getOperand(0))
3338 return getOperands(1);
3339 if (Op1->getOperand(1) == Op2->getOperand(1))
3340 return getOperands(0);
3341 break;
3342 case Instruction::Mul: {
3343 // invertible if A * B == (A * B) mod 2^N where A, and B are integers
3344 // and N is the bitwdith. The nsw case is non-obvious, but proven by
3345 // alive2: https://alive2.llvm.org/ce/z/Z6D5qK
3346 auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
3347 auto *OBO2 = cast<OverflowingBinaryOperator>(Op2);
3348 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) &&
3349 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap()))
3350 break;
3351
3352 // Assume operand order has been canonicalized
3353 if (Op1->getOperand(1) == Op2->getOperand(1) &&
3354 isa<ConstantInt>(Op1->getOperand(1)) &&
3355 !cast<ConstantInt>(Op1->getOperand(1))->isZero())
3356 return getOperands(0);
3357 break;
3358 }
3359 case Instruction::Shl: {
3360 // Same as multiplies, with the difference that we don't need to check
3361 // for a non-zero multiply. Shifts always multiply by non-zero.
3362 auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
3363 auto *OBO2 = cast<OverflowingBinaryOperator>(Op2);
3364 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) &&
3365 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap()))
3366 break;
3367
3368 if (Op1->getOperand(1) == Op2->getOperand(1))
3369 return getOperands(0);
3370 break;
3371 }
3372 case Instruction::AShr:
3373 case Instruction::LShr: {
3374 auto *PEO1 = cast<PossiblyExactOperator>(Op1);
3375 auto *PEO2 = cast<PossiblyExactOperator>(Op2);
3376 if (!PEO1->isExact() || !PEO2->isExact())
3377 break;
3378
3379 if (Op1->getOperand(1) == Op2->getOperand(1))
3380 return getOperands(0);
3381 break;
3382 }
3383 case Instruction::SExt:
3384 case Instruction::ZExt:
3385 if (Op1->getOperand(0)->getType() == Op2->getOperand(0)->getType())
3386 return getOperands(0);
3387 break;
3388 case Instruction::PHI: {
3389 const PHINode *PN1 = cast<PHINode>(Op1);
3390 const PHINode *PN2 = cast<PHINode>(Op2);
3391
3392 // If PN1 and PN2 are both recurrences, can we prove the entire recurrences
3393 // are a single invertible function of the start values? Note that repeated
3394 // application of an invertible function is also invertible
3395 BinaryOperator *BO1 = nullptr;
3396 Value *Start1 = nullptr, *Step1 = nullptr;
3397 BinaryOperator *BO2 = nullptr;
3398 Value *Start2 = nullptr, *Step2 = nullptr;
3399 if (PN1->getParent() != PN2->getParent() ||
3400 !matchSimpleRecurrence(PN1, BO1, Start1, Step1) ||
3401 !matchSimpleRecurrence(PN2, BO2, Start2, Step2))
3402 break;
3403
3404 auto Values = getInvertibleOperands(cast<Operator>(BO1),
3405 cast<Operator>(BO2));
3406 if (!Values)
3407 break;
3408
3409 // We have to be careful of mutually defined recurrences here. Ex:
3410 // * X_i = X_(i-1) OP Y_(i-1), and Y_i = X_(i-1) OP V
3411 // * X_i = Y_i = X_(i-1) OP Y_(i-1)
3412 // The invertibility of these is complicated, and not worth reasoning
3413 // about (yet?).
3414 if (Values->first != PN1 || Values->second != PN2)
3415 break;
3416
3417 return std::make_pair(Start1, Start2);
3418 }
3419 }
3420 return std::nullopt;
3421}
3422
3423/// Return true if V1 == (binop V2, X), where X is known non-zero.
3424/// Only handle a small subset of binops where (binop V2, X) with non-zero X
3425/// implies V2 != V1.
3426static bool isModifyingBinopOfNonZero(const Value *V1, const Value *V2,
3427 const APInt &DemandedElts, unsigned Depth,
3428 const SimplifyQuery &Q) {
3429 const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
3430 if (!BO)
3431 return false;
3432 switch (BO->getOpcode()) {
3433 default:
3434 break;
3435 case Instruction::Or:
3436 if (!cast<PossiblyDisjointInst>(V1)->isDisjoint())
3437 break;
3438 [[fallthrough]];
3439 case Instruction::Xor:
3440 case Instruction::Add:
3441 Value *Op = nullptr;
3442 if (V2 == BO->getOperand(0))
3443 Op = BO->getOperand(1);
3444 else if (V2 == BO->getOperand(1))
3445 Op = BO->getOperand(0);
3446 else
3447 return false;
3448 return isKnownNonZero(Op, DemandedElts, Q, Depth + 1);
3449 }
3450 return false;
3451}
3452
3453/// Return true if V2 == V1 * C, where V1 is known non-zero, C is not 0/1 and
3454/// the multiplication is nuw or nsw.
3455static bool isNonEqualMul(const Value *V1, const Value *V2,
3456 const APInt &DemandedElts, unsigned Depth,
3457 const SimplifyQuery &Q) {
3458 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) {
3459 const APInt *C;
3460 return match(OBO, m_Mul(m_Specific(V1), m_APInt(C))) &&
3461 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
3462 !C->isZero() && !C->isOne() &&
3463 isKnownNonZero(V1, DemandedElts, Q, Depth + 1);
3464 }
3465 return false;
3466}
3467
3468/// Return true if V2 == V1 << C, where V1 is known non-zero, C is not 0 and
3469/// the shift is nuw or nsw.
3470static bool isNonEqualShl(const Value *V1, const Value *V2,
3471 const APInt &DemandedElts, unsigned Depth,
3472 const SimplifyQuery &Q) {
3473 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) {
3474 const APInt *C;
3475 return match(OBO, m_Shl(m_Specific(V1), m_APInt(C))) &&
3476 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
3477 !C->isZero() && isKnownNonZero(V1, DemandedElts, Q, Depth + 1);
3478 }
3479 return false;
3480}
3481
3482static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2,
3483 const APInt &DemandedElts, unsigned Depth,
3484 const SimplifyQuery &Q) {
3485 // Check two PHIs are in same block.
3486 if (PN1->getParent() != PN2->getParent())
3487 return false;
3488
3490 bool UsedFullRecursion = false;
3491 for (const BasicBlock *IncomBB : PN1->blocks()) {
3492 if (!VisitedBBs.insert(IncomBB).second)
3493 continue; // Don't reprocess blocks that we have dealt with already.
3494 const Value *IV1 = PN1->getIncomingValueForBlock(IncomBB);
3495 const Value *IV2 = PN2->getIncomingValueForBlock(IncomBB);
3496 const APInt *C1, *C2;
3497 if (match(IV1, m_APInt(C1)) && match(IV2, m_APInt(C2)) && *C1 != *C2)
3498 continue;
3499
3500 // Only one pair of phi operands is allowed for full recursion.
3501 if (UsedFullRecursion)
3502 return false;
3503
3505 RecQ.CxtI = IncomBB->getTerminator();
3506 if (!isKnownNonEqual(IV1, IV2, DemandedElts, Depth + 1, RecQ))
3507 return false;
3508 UsedFullRecursion = true;
3509 }
3510 return true;
3511}
3512
3513static bool isNonEqualSelect(const Value *V1, const Value *V2,
3514 const APInt &DemandedElts, unsigned Depth,
3515 const SimplifyQuery &Q) {
3516 const SelectInst *SI1 = dyn_cast<SelectInst>(V1);
3517 if (!SI1)
3518 return false;
3519
3520 if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) {
3521 const Value *Cond1 = SI1->getCondition();
3522 const Value *Cond2 = SI2->getCondition();
3523 if (Cond1 == Cond2)
3524 return isKnownNonEqual(SI1->getTrueValue(), SI2->getTrueValue(),
3525 DemandedElts, Depth + 1, Q) &&
3526 isKnownNonEqual(SI1->getFalseValue(), SI2->getFalseValue(),
3527 DemandedElts, Depth + 1, Q);
3528 }
3529 return isKnownNonEqual(SI1->getTrueValue(), V2, DemandedElts, Depth + 1, Q) &&
3530 isKnownNonEqual(SI1->getFalseValue(), V2, DemandedElts, Depth + 1, Q);
3531}
3532
3533// Check to see if A is both a GEP and is the incoming value for a PHI in the
3534// loop, and B is either a ptr or another GEP. If the PHI has 2 incoming values,
3535// one of them being the recursive GEP A and the other a ptr at same base and at
3536// the same/higher offset than B we are only incrementing the pointer further in
3537// loop if offset of recursive GEP is greater than 0.
3539 const SimplifyQuery &Q) {
3540 if (!A->getType()->isPointerTy() || !B->getType()->isPointerTy())
3541 return false;
3542
3543 auto *GEPA = dyn_cast<GEPOperator>(A);
3544 if (!GEPA || GEPA->getNumIndices() != 1 || !isa<Constant>(GEPA->idx_begin()))
3545 return false;
3546
3547 // Handle 2 incoming PHI values with one being a recursive GEP.
3548 auto *PN = dyn_cast<PHINode>(GEPA->getPointerOperand());
3549 if (!PN || PN->getNumIncomingValues() != 2)
3550 return false;
3551
3552 // Search for the recursive GEP as an incoming operand, and record that as
3553 // Step.
3554 Value *Start = nullptr;
3555 Value *Step = const_cast<Value *>(A);
3556 if (PN->getIncomingValue(0) == Step)
3557 Start = PN->getIncomingValue(1);
3558 else if (PN->getIncomingValue(1) == Step)
3559 Start = PN->getIncomingValue(0);
3560 else
3561 return false;
3562
3563 // Other incoming node base should match the B base.
3564 // StartOffset >= OffsetB && StepOffset > 0?
3565 // StartOffset <= OffsetB && StepOffset < 0?
3566 // Is non-equal if above are true.
3567 // We use stripAndAccumulateInBoundsConstantOffsets to restrict the
3568 // optimisation to inbounds GEPs only.
3569 unsigned IndexWidth = Q.DL.getIndexTypeSizeInBits(Start->getType());
3570 APInt StartOffset(IndexWidth, 0);
3571 Start = Start->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StartOffset);
3572 APInt StepOffset(IndexWidth, 0);
3573 Step = Step->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StepOffset);
3574
3575 // Check if Base Pointer of Step matches the PHI.
3576 if (Step != PN)
3577 return false;
3578 APInt OffsetB(IndexWidth, 0);
3579 B = B->stripAndAccumulateInBoundsConstantOffsets(Q.DL, OffsetB);
3580 return Start == B &&
3581 ((StartOffset.sge(OffsetB) && StepOffset.isStrictlyPositive()) ||
3582 (StartOffset.sle(OffsetB) && StepOffset.isNegative()));
3583}
3584
3585/// Return true if it is known that V1 != V2.
3586static bool isKnownNonEqual(const Value *V1, const Value *V2,
3587 const APInt &DemandedElts, unsigned Depth,
3588 const SimplifyQuery &Q) {
3589 if (V1 == V2)
3590 return false;
3591 if (V1->getType() != V2->getType())
3592 // We can't look through casts yet.
3593 return false;
3594
3596 return false;
3597
3598 // See if we can recurse through (exactly one of) our operands. This
3599 // requires our operation be 1-to-1 and map every input value to exactly
3600 // one output value. Such an operation is invertible.
3601 auto *O1 = dyn_cast<Operator>(V1);
3602 auto *O2 = dyn_cast<Operator>(V2);
3603 if (O1 && O2 && O1->getOpcode() == O2->getOpcode()) {
3604 if (auto Values = getInvertibleOperands(O1, O2))
3605 return isKnownNonEqual(Values->first, Values->second, DemandedElts,
3606 Depth + 1, Q);
3607
3608 if (const PHINode *PN1 = dyn_cast<PHINode>(V1)) {
3609 const PHINode *PN2 = cast<PHINode>(V2);
3610 // FIXME: This is missing a generalization to handle the case where one is
3611 // a PHI and another one isn't.
3612 if (isNonEqualPHIs(PN1, PN2, DemandedElts, Depth, Q))
3613 return true;
3614 };
3615 }
3616
3617 if (isModifyingBinopOfNonZero(V1, V2, DemandedElts, Depth, Q) ||
3618 isModifyingBinopOfNonZero(V2, V1, DemandedElts, Depth, Q))
3619 return true;
3620
3621 if (isNonEqualMul(V1, V2, DemandedElts, Depth, Q) ||
3622 isNonEqualMul(V2, V1, DemandedElts, Depth, Q))
3623 return true;
3624
3625 if (isNonEqualShl(V1, V2, DemandedElts, Depth, Q) ||
3626 isNonEqualShl(V2, V1, DemandedElts, Depth, Q))
3627 return true;
3628
3629 if (V1->getType()->isIntOrIntVectorTy()) {
3630 // Are any known bits in V1 contradictory to known bits in V2? If V1
3631 // has a known zero where V2 has a known one, they must not be equal.
3632 KnownBits Known1 = computeKnownBits(V1, DemandedElts, Depth, Q);
3633 if (!Known1.isUnknown()) {
3634 KnownBits Known2 = computeKnownBits(V2, DemandedElts, Depth, Q);
3635 if (Known1.Zero.intersects(Known2.One) ||
3636 Known2.Zero.intersects(Known1.One))
3637 return true;
3638 }
3639 }
3640
3641 if (isNonEqualSelect(V1, V2, DemandedElts, Depth, Q) ||
3642 isNonEqualSelect(V2, V1, DemandedElts, Depth, Q))
3643 return true;
3644
3645 if (isNonEqualPointersWithRecursiveGEP(V1, V2, Q) ||
3647 return true;
3648
3649 Value *A, *B;
3650 // PtrToInts are NonEqual if their Ptrs are NonEqual.
3651 // Check PtrToInt type matches the pointer size.
3652 if (match(V1, m_PtrToIntSameSize(Q.DL, m_Value(A))) &&
3654 return isKnownNonEqual(A, B, DemandedElts, Depth + 1, Q);
3655
3656 return false;
3657}
3658
3659// Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow).
3660// Returns the input and lower/upper bounds.
3661static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
3662 const APInt *&CLow, const APInt *&CHigh) {
3663 assert(isa<Operator>(Select) &&
3664 cast<Operator>(Select)->getOpcode() == Instruction::Select &&
3665 "Input should be a Select!");
3666
3667 const Value *LHS = nullptr, *RHS = nullptr;
3669 if (SPF != SPF_SMAX && SPF != SPF_SMIN)
3670 return false;
3671
3672 if (!match(RHS, m_APInt(CLow)))
3673 return false;
3674
3675 const Value *LHS2 = nullptr, *RHS2 = nullptr;
3677 if (getInverseMinMaxFlavor(SPF) != SPF2)
3678 return false;
3679
3680 if (!match(RHS2, m_APInt(CHigh)))
3681 return false;
3682
3683 if (SPF == SPF_SMIN)
3684 std::swap(CLow, CHigh);
3685
3686 In = LHS2;
3687 return CLow->sle(*CHigh);
3688}
3689
3691 const APInt *&CLow,
3692 const APInt *&CHigh) {
3693 assert((II->getIntrinsicID() == Intrinsic::smin ||
3694 II->getIntrinsicID() == Intrinsic::smax) && "Must be smin/smax");
3695
3696 Intrinsic::ID InverseID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
3697 auto *InnerII = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
3698 if (!InnerII || InnerII->getIntrinsicID() != InverseID ||
3699 !match(II->getArgOperand(1), m_APInt(CLow)) ||
3700 !match(InnerII->getArgOperand(1), m_APInt(CHigh)))
3701 return false;
3702
3703 if (II->getIntrinsicID() == Intrinsic::smin)
3704 std::swap(CLow, CHigh);
3705 return CLow->sle(*CHigh);
3706}
3707
3708/// For vector constants, loop over the elements and find the constant with the
3709/// minimum number of sign bits. Return 0 if the value is not a vector constant
3710/// or if any element was not analyzed; otherwise, return the count for the
3711/// element with the minimum number of sign bits.
3713 const APInt &DemandedElts,
3714 unsigned TyBits) {
3715 const auto *CV = dyn_cast<Constant>(V);
3716 if (!CV || !isa<FixedVectorType>(CV->getType()))
3717 return 0;
3718
3719 unsigned MinSignBits = TyBits;
3720 unsigned NumElts = cast<FixedVectorType>(CV->getType())->getNumElements();
3721 for (unsigned i = 0; i != NumElts; ++i) {
3722 if (!DemandedElts[i])
3723 continue;
3724 // If we find a non-ConstantInt, bail out.
3725 auto *Elt = dyn_cast_or_null<ConstantInt>(CV->getAggregateElement(i));
3726 if (!Elt)
3727 return 0;
3728
3729 MinSignBits = std::min(MinSignBits, Elt->getValue().getNumSignBits());
3730 }
3731
3732 return MinSignBits;
3733}
3734
3735static unsigned ComputeNumSignBitsImpl(const Value *V,
3736 const APInt &DemandedElts,
3737 unsigned Depth, const SimplifyQuery &Q);
3738
3739static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
3740 unsigned Depth, const SimplifyQuery &Q) {
3741 unsigned Result = ComputeNumSignBitsImpl(V, DemandedElts, Depth, Q);
3742 assert(Result > 0 && "At least one sign bit needs to be present!");
3743 return Result;
3744}
3745
3746/// Return the number of times the sign bit of the register is replicated into
3747/// the other bits. We know that at least 1 bit is always equal to the sign bit
3748/// (itself), but other cases can give us information. For example, immediately
3749/// after an "ashr X, 2", we know that the top 3 bits are all equal to each
3750/// other, so we return 3. For vectors, return the number of sign bits for the
3751/// vector element with the minimum number of known sign bits of the demanded
3752/// elements in the vector specified by DemandedElts.
3753static unsigned ComputeNumSignBitsImpl(const Value *V,
3754 const APInt &DemandedElts,
3755 unsigned Depth, const SimplifyQuery &Q) {
3756 Type *Ty = V->getType();
3757#ifndef NDEBUG
3758 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
3759
3760 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
3761 assert(
3762 FVTy->getNumElements() == DemandedElts.getBitWidth() &&
3763 "DemandedElt width should equal the fixed vector number of elements");
3764 } else {
3765 assert(DemandedElts == APInt(1, 1) &&
3766 "DemandedElt width should be 1 for scalars");
3767 }
3768#endif
3769
3770 // We return the minimum number of sign bits that are guaranteed to be present
3771 // in V, so for undef we have to conservatively return 1. We don't have the
3772 // same behavior for poison though -- that's a FIXME today.
3773
3774 Type *ScalarTy = Ty->getScalarType();
3775 unsigned TyBits = ScalarTy->isPointerTy() ?
3776 Q.DL.getPointerTypeSizeInBits(ScalarTy) :
3777 Q.DL.getTypeSizeInBits(ScalarTy);
3778
3779 unsigned Tmp, Tmp2;
3780 unsigned FirstAnswer = 1;
3781
3782 // Note that ConstantInt is handled by the general computeKnownBits case
3783 // below.
3784
3786 return 1;
3787
3788 if (auto *U = dyn_cast<Operator>(V)) {
3789 switch (Operator::getOpcode(V)) {
3790 default: break;
3791 case Instruction::SExt:
3792 Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
3793 return ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q) +
3794 Tmp;
3795
3796 case Instruction::SDiv: {
3797 const APInt *Denominator;
3798 // sdiv X, C -> adds log(C) sign bits.
3799 if (match(U->getOperand(1), m_APInt(Denominator))) {
3800
3801 // Ignore non-positive denominator.
3802 if (!Denominator->isStrictlyPositive())
3803 break;
3804
3805 // Calculate the incoming numerator bits.
3806 unsigned NumBits =
3807 ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
3808
3809 // Add floor(log(C)) bits to the numerator bits.
3810 return std::min(TyBits, NumBits + Denominator->logBase2());
3811 }
3812 break;
3813 }
3814
3815 case Instruction::SRem: {
3816 Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
3817
3818 const APInt *Denominator;
3819 // srem X, C -> we know that the result is within [-C+1,C) when C is a
3820 // positive constant. This let us put a lower bound on the number of sign
3821 // bits.
3822 if (match(U->getOperand(1), m_APInt(Denominator))) {
3823
3824 // Ignore non-positive denominator.
3825 if (Denominator->isStrictlyPositive()) {
3826 // Calculate the leading sign bit constraints by examining the
3827 // denominator. Given that the denominator is positive, there are two
3828 // cases:
3829 //
3830 // 1. The numerator is positive. The result range is [0,C) and
3831 // [0,C) u< (1 << ceilLogBase2(C)).
3832 //
3833 // 2. The numerator is negative. Then the result range is (-C,0] and
3834 // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
3835 //
3836 // Thus a lower bound on the number of sign bits is `TyBits -
3837 // ceilLogBase2(C)`.
3838
3839 unsigned ResBits = TyBits - Denominator->ceilLogBase2();
3840 Tmp = std::max(Tmp, ResBits);
3841 }
3842 }
3843 return Tmp;
3844 }
3845
3846 case Instruction::AShr: {
3847 Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
3848 // ashr X, C -> adds C sign bits. Vectors too.
3849 const APInt *ShAmt;
3850 if (match(U->getOperand(1), m_APInt(ShAmt))) {
3851 if (ShAmt->uge(TyBits))
3852 break; // Bad shift.
3853 unsigned ShAmtLimited = ShAmt->getZExtValue();
3854 Tmp += ShAmtLimited;
3855 if (Tmp > TyBits) Tmp = TyBits;
3856 }
3857 return Tmp;
3858 }
3859 case Instruction::Shl: {
3860 const APInt *ShAmt;
3861 Value *X = nullptr;
3862 if (match(U->getOperand(1), m_APInt(ShAmt))) {
3863 // shl destroys sign bits.
3864 if (ShAmt->uge(TyBits))
3865 break; // Bad shift.
3866 // We can look through a zext (more or less treating it as a sext) if
3867 // all extended bits are shifted out.
3868 if (match(U->getOperand(0), m_ZExt(m_Value(X))) &&
3869 ShAmt->uge(TyBits - X->getType()->getScalarSizeInBits())) {
3870 Tmp = ComputeNumSignBits(X, DemandedElts, Depth + 1, Q);
3871 Tmp += TyBits - X->getType()->getScalarSizeInBits();
3872 } else
3873 Tmp =
3874 ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
3875 if (ShAmt->uge(Tmp))
3876 break; // Shifted all sign bits out.
3877 Tmp2 = ShAmt->getZExtValue();
3878 return Tmp - Tmp2;
3879 }
3880 break;
3881 }
3882 case Instruction::And:
3883 case Instruction::Or:
3884 case Instruction::Xor: // NOT is handled here.
3885 // Logical binary ops preserve the number of sign bits at the worst.
3886 Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
3887 if (Tmp != 1) {
3888 Tmp2 = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
3889 FirstAnswer = std::min(Tmp, Tmp2);
3890 // We computed what we know about the sign bits as our first
3891 // answer. Now proceed to the generic code that uses
3892 // computeKnownBits, and pick whichever answer is better.
3893 }
3894 break;
3895
3896 case Instruction::Select: {
3897 // If we have a clamp pattern, we know that the number of sign bits will
3898 // be the minimum of the clamp min/max range.
3899 const Value *X;
3900 const APInt *CLow, *CHigh;
3901 if (isSignedMinMaxClamp(U, X, CLow, CHigh))
3902 return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
3903
3904 Tmp = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
3905 if (Tmp == 1)
3906 break;
3907 Tmp2 = ComputeNumSignBits(U->getOperand(2), DemandedElts, Depth + 1, Q);
3908 return std::min(Tmp, Tmp2);
3909 }
3910
3911 case Instruction::Add:
3912 // Add can have at most one carry bit. Thus we know that the output
3913 // is, at worst, one more bit than the inputs.
3914 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3915 if (Tmp == 1) break;
3916
3917 // Special case decrementing a value (ADD X, -1):
3918 if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
3919 if (CRHS->isAllOnesValue()) {
3920 KnownBits Known(TyBits);
3921 computeKnownBits(U->getOperand(0), DemandedElts, Known, Depth + 1, Q);
3922
3923 // If the input is known to be 0 or 1, the output is 0/-1, which is
3924 // all sign bits set.
3925 if ((Known.Zero | 1).isAllOnes())
3926 return TyBits;
3927
3928 // If we are subtracting one from a positive number, there is no carry
3929 // out of the result.
3930 if (Known.isNonNegative())
3931 return Tmp;
3932 }
3933
3934 Tmp2 = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
3935 if (Tmp2 == 1)
3936 break;
3937 return std::min(Tmp, Tmp2) - 1;
3938
3939 case Instruction::Sub:
3940 Tmp2 = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
3941 if (Tmp2 == 1)
3942 break;
3943
3944 // Handle NEG.
3945 if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
3946 if (CLHS->isNullValue()) {
3947 KnownBits Known(TyBits);
3948 computeKnownBits(U->getOperand(1), DemandedElts, Known, Depth + 1, Q);
3949 // If the input is known to be 0 or 1, the output is 0/-1, which is
3950 // all sign bits set.
3951 if ((Known.Zero | 1).isAllOnes())
3952 return TyBits;
3953
3954 // If the input is known to be positive (the sign bit is known clear),
3955 // the output of the NEG has the same number of sign bits as the
3956 // input.
3957 if (Known.isNonNegative())
3958 return Tmp2;
3959
3960 // Otherwise, we treat this like a SUB.
3961 }
3962
3963 // Sub can have at most one carry bit. Thus we know that the output
3964 // is, at worst, one more bit than the inputs.
3965 Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
3966 if (Tmp == 1)
3967 break;
3968 return std::min(Tmp, Tmp2) - 1;
3969
3970 case Instruction::Mul: {
3971 // The output of the Mul can be at most twice the valid bits in the
3972 // inputs.
3973 unsigned SignBitsOp0 =
3974 ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
3975 if (SignBitsOp0 == 1)
3976 break;
3977 unsigned SignBitsOp1 =
3978 ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
3979 if (SignBitsOp1 == 1)
3980 break;
3981 unsigned OutValidBits =
3982 (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1);
3983 return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1;
3984 }
3985
3986 case Instruction::PHI: {
3987 const PHINode *PN = cast<PHINode>(U);
3988 unsigned NumIncomingValues = PN->getNumIncomingValues();
3989 // Don't analyze large in-degree PHIs.
3990 if (NumIncomingValues > 4) break;
3991 // Unreachable blocks may have zero-operand PHI nodes.
3992 if (NumIncomingValues == 0) break;
3993
3994 // Take the minimum of all incoming values. This can't infinitely loop
3995 // because of our depth threshold.
3997 Tmp = TyBits;
3998 for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) {
3999 if (Tmp == 1) return Tmp;
4000 RecQ.CxtI = PN->getIncomingBlock(i)->getTerminator();
4001 Tmp = std::min(Tmp, ComputeNumSignBits(PN->getIncomingValue(i),
4002 DemandedElts, Depth + 1, RecQ));
4003 }
4004 return Tmp;
4005 }
4006
4007 case Instruction::Trunc: {
4008 // If the input contained enough sign bits that some remain after the
4009 // truncation, then we can make use of that. Otherwise we don't know
4010 // anything.
4011 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
4012 unsigned OperandTyBits = U->getOperand(0)->getType()->getScalarSizeInBits();
4013 if (Tmp > (OperandTyBits - TyBits))
4014 return Tmp - (OperandTyBits - TyBits);
4015
4016 return 1;
4017 }
4018
4019 case Instruction::ExtractElement:
4020 // Look through extract element. At the moment we keep this simple and
4021 // skip tracking the specific element. But at least we might find
4022 // information valid for all elements of the vector (for example if vector
4023 // is sign extended, shifted, etc).
4024 return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
4025
4026 case Instruction::ShuffleVector: {
4027 // Collect the minimum number of sign bits that are shared by every vector
4028 // element referenced by the shuffle.
4029 auto *Shuf = dyn_cast<ShuffleVectorInst>(U);
4030 if (!Shuf) {
4031 // FIXME: Add support for shufflevector constant expressions.
4032 return 1;
4033 }
4034 APInt DemandedLHS, DemandedRHS;
4035 // For undef elements, we don't know anything about the common state of
4036 // the shuffle result.
4037 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
4038 return 1;
4039 Tmp = std::numeric_limits<unsigned>::max();
4040 if (!!DemandedLHS) {
4041 const Value *LHS = Shuf->getOperand(0);
4042 Tmp = ComputeNumSignBits(LHS, DemandedLHS, Depth + 1, Q);
4043 }
4044 // If we don't know anything, early out and try computeKnownBits
4045 // fall-back.
4046 if (Tmp == 1)
4047 break;
4048 if (!!DemandedRHS) {
4049 const Value *RHS = Shuf->getOperand(1);
4050 Tmp2 = ComputeNumSignBits(RHS, DemandedRHS, Depth + 1, Q);
4051 Tmp = std::min(Tmp, Tmp2);
4052 }
4053 // If we don't know anything, early out and try computeKnownBits
4054 // fall-back.
4055 if (Tmp == 1)
4056 break;
4057 assert(Tmp <= TyBits && "Failed to determine minimum sign bits");
4058 return Tmp;
4059 }
4060 case Instruction::Call: {
4061 if (const auto *II = dyn_cast<IntrinsicInst>(U)) {
4062 switch (II->getIntrinsicID()) {
4063 default:
4064 break;
4065 case Intrinsic::abs:
4066 Tmp =
4067 ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
4068 if (Tmp == 1)
4069 break;
4070
4071 // Absolute value reduces number of sign bits by at most 1.
4072 return Tmp - 1;
4073 case Intrinsic::smin:
4074 case Intrinsic::smax: {
4075 const APInt *CLow, *CHigh;
4076 if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh))
4077 return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
4078 }
4079 }
4080 }
4081 }
4082 }
4083 }
4084
4085 // Finally, if we can prove that the top bits of the result are 0's or 1's,
4086 // use this information.
4087
4088 // If we can examine all elements of a vector constant successfully, we're
4089 // done (we can't do any better than that). If not, keep trying.
4090 if (unsigned VecSignBits =
4091 computeNumSignBitsVectorConstant(V, DemandedElts, TyBits))
4092 return VecSignBits;
4093
4094 KnownBits Known(TyBits);
4095 computeKnownBits(V, DemandedElts, Known, Depth, Q);
4096
4097 // If we know that the sign bit is either zero or one, determine the number of
4098 // identical bits in the top of the input value.
4099 return std::max(FirstAnswer, Known.countMinSignBits());
4100}
4101
4103 const TargetLibraryInfo *TLI) {
4104 const Function *F = CB.getCalledFunction();
4105 if (!F)
4107
4108 if (F->isIntrinsic())
4109 return F->getIntrinsicID();
4110
4111 // We are going to infer semantics of a library function based on mapping it
4112 // to an LLVM intrinsic. Check that the library function is available from
4113 // this callbase and in this environment.
4114 LibFunc Func;
4115 if (F->hasLocalLinkage() || !TLI || !TLI->getLibFunc(CB, Func) ||
4116 !CB.onlyReadsMemory())
4118
4119 switch (Func) {
4120 default:
4121 break;
4122 case LibFunc_sin:
4123 case LibFunc_sinf:
4124 case LibFunc_sinl:
4125 return Intrinsic::sin;
4126 case LibFunc_cos:
4127 case LibFunc_cosf:
4128 case LibFunc_cosl:
4129 return Intrinsic::cos;
4130 case LibFunc_tan:
4131 case LibFunc_tanf:
4132 case LibFunc_tanl:
4133 return Intrinsic::tan;
4134 case LibFunc_exp:
4135 case LibFunc_expf:
4136 case LibFunc_expl:
4137 return Intrinsic::exp;
4138 case LibFunc_exp2:
4139 case LibFunc_exp2f:
4140 case LibFunc_exp2l:
4141 return Intrinsic::exp2;
4142 case LibFunc_log:
4143 case LibFunc_logf:
4144 case LibFunc_logl:
4145 return Intrinsic::log;
4146 case LibFunc_log10:
4147 case LibFunc_log10f:
4148 case LibFunc_log10l:
4149 return Intrinsic::log10;
4150 case LibFunc_log2:
4151 case LibFunc_log2f:
4152 case LibFunc_log2l:
4153 return Intrinsic::log2;
4154 case LibFunc_fabs:
4155 case LibFunc_fabsf:
4156 case LibFunc_fabsl:
4157 return Intrinsic::fabs;
4158 case LibFunc_fmin:
4159 case LibFunc_fminf:
4160 case LibFunc_fminl:
4161 return Intrinsic::minnum;
4162 case LibFunc_fmax:
4163 case LibFunc_fmaxf:
4164 case LibFunc_fmaxl:
4165 return Intrinsic::maxnum;
4166 case LibFunc_copysign:
4167 case LibFunc_copysignf:
4168 case LibFunc_copysignl:
4169 return Intrinsic::copysign;
4170 case LibFunc_floor:
4171 case LibFunc_floorf:
4172 case LibFunc_floorl:
4173 return Intrinsic::floor;
4174 case LibFunc_ceil:
4175 case LibFunc_ceilf:
4176 case LibFunc_ceill:
4177 return Intrinsic::ceil;
4178 case LibFunc_trunc:
4179 case LibFunc_truncf:
4180 case LibFunc_truncl:
4181 return Intrinsic::trunc;
4182 case LibFunc_rint:
4183 case LibFunc_rintf:
4184 case LibFunc_rintl:
4185 return Intrinsic::rint;
4186 case LibFunc_nearbyint:
4187 case LibFunc_nearbyintf:
4188 case LibFunc_nearbyintl:
4189 return Intrinsic::nearbyint;
4190 case LibFunc_round:
4191 case LibFunc_roundf:
4192 case LibFunc_roundl:
4193 return Intrinsic::round;
4194 case LibFunc_roundeven:
4195 case LibFunc_roundevenf:
4196 case LibFunc_roundevenl:
4197 return Intrinsic::roundeven;
4198 case LibFunc_pow:
4199 case LibFunc_powf:
4200 case LibFunc_powl:
4201 return Intrinsic::pow;
4202 case LibFunc_sqrt:
4203 case LibFunc_sqrtf:
4204 case LibFunc_sqrtl:
4205 return Intrinsic::sqrt;
4206 }
4207
4209}
4210
4211/// Return true if it's possible to assume IEEE treatment of input denormals in
4212/// \p F for \p Val.
4213static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
4214 Ty = Ty->getScalarType();
4215 return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE;
4216}
4217
4218static bool inputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) {
4219 Ty = Ty->getScalarType();
4220 DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics());
4221 return Mode.Input == DenormalMode::IEEE ||
4222 Mode.Input == DenormalMode::PositiveZero;
4223}
4224
4225static bool outputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) {
4226 Ty = Ty->getScalarType();
4227 DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics());
4228 return Mode.Output == DenormalMode::IEEE ||
4229 Mode.Output == DenormalMode::PositiveZero;
4230}
4231
4233 return isKnownNeverZero() &&
4235}
4236
4238 Type *Ty) const {
4239 return isKnownNeverNegZero() &&
4241}
4242
4244 Type *Ty) const {
4245 if (!isKnownNeverPosZero())
4246 return false;
4247
4248 // If we know there are no denormals, nothing can be flushed to zero.
4250 return true;
4251
4252 DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics());
4253 switch (Mode.Input) {
4254 case DenormalMode::IEEE:
4255 return true;
4257 // Negative subnormal won't flush to +0
4258 return isKnownNeverPosSubnormal();
4260 default:
4261 // Both positive and negative subnormal could flush to +0
4262 return false;
4263 }
4264
4265 llvm_unreachable("covered switch over denormal mode");
4266}
4267
4269 Type *Ty) {
4270 KnownFPClasses = Src.KnownFPClasses;
4271 // If we aren't assuming the source can't be a zero, we don't have to check if
4272 // a denormal input could be flushed.
4273 if (!Src.isKnownNeverPosZero() && !Src.isKnownNeverNegZero())
4274 return;
4275
4276 // If we know the input can't be a denormal, it can't be flushed to 0.
4277 if (Src.isKnownNeverSubnormal())
4278 return;
4279
4280 DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics());
4281
4282 if (!Src.isKnownNeverPosSubnormal() && Mode != DenormalMode::getIEEE())
4284
4285 if (!Src.isKnownNeverNegSubnormal() && Mode != DenormalMode::getIEEE()) {
4286 if (Mode != DenormalMode::getPositiveZero())
4288
4289 if (Mode.Input == DenormalMode::PositiveZero ||
4290 Mode.Output == DenormalMode::PositiveZero ||
4291 Mode.Input == DenormalMode::Dynamic ||
4292 Mode.Output == DenormalMode::Dynamic)
4294 }
4295}
4296
4298 const Function &F, Type *Ty) {
4299 propagateDenormal(Src, F, Ty);
4300 propagateNaN(Src, /*PreserveSign=*/true);
4301}
4302
4303/// Given an exploded icmp instruction, return true if the comparison only
4304/// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if
4305/// the result of the comparison is true when the input value is signed.
4307 bool &TrueIfSigned) {
4308 switch (Pred) {
4309 case ICmpInst::ICMP_SLT: // True if LHS s< 0
4310 TrueIfSigned = true;
4311 return RHS.isZero();
4312 case ICmpInst::ICMP_SLE: // True if LHS s<= -1
4313 TrueIfSigned = true;
4314 return RHS.isAllOnes();
4315 case ICmpInst::ICMP_SGT: // True if LHS s> -1
4316 TrueIfSigned = false;
4317 return RHS.isAllOnes();
4318 case ICmpInst::ICMP_SGE: // True if LHS s>= 0
4319 TrueIfSigned = false;
4320 return RHS.isZero();
4321 case ICmpInst::ICMP_UGT:
4322 // True if LHS u> RHS and RHS == sign-bit-mask - 1
4323 TrueIfSigned = true;
4324 return RHS.isMaxSignedValue();
4325 case ICmpInst::ICMP_UGE:
4326 // True if LHS u>= RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
4327 TrueIfSigned = true;
4328 return RHS.isMinSignedValue();
4329 case ICmpInst::ICMP_ULT:
4330 // True if LHS u< RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
4331 TrueIfSigned = false;
4332 return RHS.isMinSignedValue();
4333 case ICmpInst::ICMP_ULE:
4334 // True if LHS u<= RHS and RHS == sign-bit-mask - 1
4335 TrueIfSigned = false;
4336 return RHS.isMaxSignedValue();
4337 default:
4338 return false;
4339 }
4340}
4341
4342/// Returns a pair of values, which if passed to llvm.is.fpclass, returns the
4343/// same result as an fcmp with the given operands.
4344std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred,
4345 const Function &F,
4346 Value *LHS, Value *RHS,
4347 bool LookThroughSrc) {
4348 const APFloat *ConstRHS;
4349 if (!match(RHS, m_APFloatAllowPoison(ConstRHS)))
4350 return {nullptr, fcAllFlags};
4351
4352 return fcmpToClassTest(Pred, F, LHS, ConstRHS, LookThroughSrc);
4353}
4354
4355std::pair<Value *, FPClassTest>
4357 const APFloat *ConstRHS, bool LookThroughSrc) {
4358
4359 auto [Src, ClassIfTrue, ClassIfFalse] =
4360 fcmpImpliesClass(Pred, F, LHS, *ConstRHS, LookThroughSrc);
4361 if (Src && ClassIfTrue == ~ClassIfFalse)
4362 return {Src, ClassIfTrue};
4363 return {nullptr, fcAllFlags};
4364}
4365
4366/// Return the return value for fcmpImpliesClass for a compare that produces an
4367/// exact class test.
4368static std::tuple<Value *, FPClassTest, FPClassTest> exactClass(Value *V,
4369 FPClassTest M) {
4370 return {V, M, ~M};
4371}
4372
4373std::tuple<Value *, FPClassTest, FPClassTest>
4375 FPClassTest RHSClass, bool LookThroughSrc) {
4376 assert(RHSClass != fcNone);
4377 Value *Src = LHS;
4378
4379 if (Pred == FCmpInst::FCMP_TRUE)
4380 return exactClass(Src, fcAllFlags);
4381
4382 if (Pred == FCmpInst::FCMP_FALSE)
4383 return exactClass(Src, fcNone);
4384
4385 const FPClassTest OrigClass = RHSClass;
4386
4387 const bool IsNegativeRHS = (RHSClass & fcNegative) == RHSClass;
4388 const bool IsPositiveRHS = (RHSClass & fcPositive) == RHSClass;
4389 const bool IsNaN = (RHSClass & ~fcNan) == fcNone;
4390
4391 if (IsNaN) {
4392 // fcmp o__ x, nan -> false
4393 // fcmp u__ x, nan -> true
4394 return exactClass(Src, CmpInst::isOrdered(Pred) ? fcNone : fcAllFlags);
4395 }
4396
4397 // fcmp ord x, zero|normal|subnormal|inf -> ~fcNan
4398 if (Pred == FCmpInst::FCMP_ORD)
4399 return exactClass(Src, ~fcNan);
4400
4401 // fcmp uno x, zero|normal|subnormal|inf -> fcNan
4402 if (Pred == FCmpInst::FCMP_UNO)
4403 return exactClass(Src, fcNan);
4404
4405 const bool IsFabs = LookThroughSrc && match(LHS, m_FAbs(m_Value(Src)));
4406 if (IsFabs)
4407 RHSClass = llvm::inverse_fabs(RHSClass);
4408
4409 const bool IsZero = (OrigClass & fcZero) == OrigClass;
4410 if (IsZero) {
4411 assert(Pred != FCmpInst::FCMP_ORD && Pred != FCmpInst::FCMP_UNO);
4412 // Compares with fcNone are only exactly equal to fcZero if input denormals
4413 // are not flushed.
4414 // TODO: Handle DAZ by expanding masks to cover subnormal cases.
4415 if (!inputDenormalIsIEEE(F, LHS->getType()))
4416 return {nullptr, fcAllFlags, fcAllFlags};
4417
4418 switch (Pred) {
4419 case FCmpInst::FCMP_OEQ: // Match x == 0.0
4420 return exactClass(Src, fcZero);
4421 case FCmpInst::FCMP_UEQ: // Match isnan(x) || (x == 0.0)
4422 return exactClass(Src, fcZero | fcNan);
4423 case FCmpInst::FCMP_UNE: // Match (x != 0.0)
4424 return exactClass(Src, ~fcZero);
4425 case FCmpInst::FCMP_ONE: // Match !isnan(x) && x != 0.0
4426 return exactClass(Src, ~fcNan & ~fcZero);
4427 case FCmpInst::FCMP_ORD:
4428 // Canonical form of ord/uno is with a zero. We could also handle
4429 // non-canonical other non-NaN constants or LHS == RHS.
4430 return exactClass(Src, ~fcNan);
4431 case FCmpInst::FCMP_UNO:
4432 return exactClass(Src, fcNan);
4433 case FCmpInst::FCMP_OGT: // x > 0
4435 case FCmpInst::FCMP_UGT: // isnan(x) || x > 0
4437 case FCmpInst::FCMP_OGE: // x >= 0
4438