LLVM 20.0.0git
ValueTracking.cpp
Go to the documentation of this file.
1//===- ValueTracking.cpp - Walk computations to compute properties --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains routines that help analyze properties that chains of
10// computations have.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/APFloat.h"
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/ScopeExit.h"
21#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/StringRef.h"
32#include "llvm/Analysis/Loads.h"
37#include "llvm/IR/Argument.h"
38#include "llvm/IR/Attributes.h"
39#include "llvm/IR/BasicBlock.h"
40#include "llvm/IR/Constant.h"
42#include "llvm/IR/Constants.h"
45#include "llvm/IR/Dominators.h"
47#include "llvm/IR/Function.h"
49#include "llvm/IR/GlobalAlias.h"
50#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/InstrTypes.h"
53#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Intrinsics.h"
57#include "llvm/IR/IntrinsicsAArch64.h"
58#include "llvm/IR/IntrinsicsAMDGPU.h"
59#include "llvm/IR/IntrinsicsRISCV.h"
60#include "llvm/IR/IntrinsicsX86.h"
61#include "llvm/IR/LLVMContext.h"
62#include "llvm/IR/Metadata.h"
63#include "llvm/IR/Module.h"
64#include "llvm/IR/Operator.h"
66#include "llvm/IR/Type.h"
67#include "llvm/IR/User.h"
68#include "llvm/IR/Value.h"
76#include <algorithm>
77#include <cassert>
78#include <cstdint>
79#include <optional>
80#include <utility>
81
82using namespace llvm;
83using namespace llvm::PatternMatch;
84
85// Controls the number of uses of the value searched for possible
86// dominating comparisons.
87static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
88 cl::Hidden, cl::init(20));
89
90
91/// Returns the bitwidth of the given scalar or pointer type. For vector types,
92/// returns the element type's bitwidth.
93static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
94 if (unsigned BitWidth = Ty->getScalarSizeInBits())
95 return BitWidth;
96
97 return DL.getPointerTypeSizeInBits(Ty);
98}
99
100// Given the provided Value and, potentially, a context instruction, return
101// the preferred context instruction (if any).
102static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
103 // If we've been provided with a context instruction, then use that (provided
104 // it has been inserted).
105 if (CxtI && CxtI->getParent())
106 return CxtI;
107
108 // If the value is really an already-inserted instruction, then use that.
109 CxtI = dyn_cast<Instruction>(V);
110 if (CxtI && CxtI->getParent())
111 return CxtI;
112
113 return nullptr;
114}
115
116static const Instruction *safeCxtI(const Value *V1, const Value *V2, const Instruction *CxtI) {
117 // If we've been provided with a context instruction, then use that (provided
118 // it has been inserted).
119 if (CxtI && CxtI->getParent())
120 return CxtI;
121
122 // If the value is really an already-inserted instruction, then use that.
123 CxtI = dyn_cast<Instruction>(V1);
124 if (CxtI && CxtI->getParent())
125 return CxtI;
126
127 CxtI = dyn_cast<Instruction>(V2);
128 if (CxtI && CxtI->getParent())
129 return CxtI;
130
131 return nullptr;
132}
133
135 const APInt &DemandedElts,
136 APInt &DemandedLHS, APInt &DemandedRHS) {
137 if (isa<ScalableVectorType>(Shuf->getType())) {
138 assert(DemandedElts == APInt(1,1));
139 DemandedLHS = DemandedRHS = DemandedElts;
140 return true;
141 }
142
143 int NumElts =
144 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
145 return llvm::getShuffleDemandedElts(NumElts, Shuf->getShuffleMask(),
146 DemandedElts, DemandedLHS, DemandedRHS);
147}
148
149static void computeKnownBits(const Value *V, const APInt &DemandedElts,
150 KnownBits &Known, unsigned Depth,
151 const SimplifyQuery &Q);
152
153void llvm::computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
154 const SimplifyQuery &Q) {
155 // Since the number of lanes in a scalable vector is unknown at compile time,
156 // we track one bit which is implicitly broadcast to all lanes. This means
157 // that all lanes in a scalable vector are considered demanded.
158 auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
159 APInt DemandedElts =
160 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
161 ::computeKnownBits(V, DemandedElts, Known, Depth, Q);
162}
163
165 const DataLayout &DL, unsigned Depth,
166 AssumptionCache *AC, const Instruction *CxtI,
167 const DominatorTree *DT, bool UseInstrInfo) {
169 V, Known, Depth,
170 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
171}
172
174 unsigned Depth, AssumptionCache *AC,
175 const Instruction *CxtI,
176 const DominatorTree *DT, bool UseInstrInfo) {
177 return computeKnownBits(
178 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
179}
180
181KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
182 const DataLayout &DL, unsigned Depth,
183 AssumptionCache *AC, const Instruction *CxtI,
184 const DominatorTree *DT, bool UseInstrInfo) {
185 return computeKnownBits(
186 V, DemandedElts, Depth,
187 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
188}
189
190static bool haveNoCommonBitsSetSpecialCases(const Value *LHS, const Value *RHS,
191 const SimplifyQuery &SQ) {
192 // Look for an inverted mask: (X & ~M) op (Y & M).
193 {
194 Value *M;
195 if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
197 isGuaranteedNotToBeUndef(M, SQ.AC, SQ.CxtI, SQ.DT))
198 return true;
199 }
200
201 // X op (Y & ~X)
204 return true;
205
206 // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern
207 // for constant Y.
208 Value *Y;
209 if (match(RHS,
211 isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT) &&
212 isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT))
213 return true;
214
215 // Peek through extends to find a 'not' of the other side:
216 // (ext Y) op ext(~Y)
217 if (match(LHS, m_ZExtOrSExt(m_Value(Y))) &&
219 isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT))
220 return true;
221
222 // Look for: (A & B) op ~(A | B)
223 {
224 Value *A, *B;
225 if (match(LHS, m_And(m_Value(A), m_Value(B))) &&
227 isGuaranteedNotToBeUndef(A, SQ.AC, SQ.CxtI, SQ.DT) &&
228 isGuaranteedNotToBeUndef(B, SQ.AC, SQ.CxtI, SQ.DT))
229 return true;
230 }
231
232 return false;
233}
234
236 const WithCache<const Value *> &RHSCache,
237 const SimplifyQuery &SQ) {
238 const Value *LHS = LHSCache.getValue();
239 const Value *RHS = RHSCache.getValue();
240
241 assert(LHS->getType() == RHS->getType() &&
242 "LHS and RHS should have the same type");
244 "LHS and RHS should be integers");
245
248 return true;
249
251 RHSCache.getKnownBits(SQ));
252}
253
255 return !I->user_empty() && all_of(I->users(), [](const User *U) {
256 return match(U, m_ICmp(m_Value(), m_Zero()));
257 });
258}
259
261 return !I->user_empty() && all_of(I->users(), [](const User *U) {
262 CmpPredicate P;
263 return match(U, m_ICmp(P, m_Value(), m_Zero())) && ICmpInst::isEquality(P);
264 });
265}
266
268 bool OrZero, unsigned Depth,
269 AssumptionCache *AC, const Instruction *CxtI,
270 const DominatorTree *DT, bool UseInstrInfo) {
271 return ::isKnownToBeAPowerOfTwo(
272 V, OrZero, Depth,
273 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
274}
275
276static bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
277 const SimplifyQuery &Q, unsigned Depth);
278
280 unsigned Depth) {
281 return computeKnownBits(V, Depth, SQ).isNonNegative();
282}
283
285 unsigned Depth) {
286 if (auto *CI = dyn_cast<ConstantInt>(V))
287 return CI->getValue().isStrictlyPositive();
288
289 // If `isKnownNonNegative` ever becomes more sophisticated, make sure to keep
290 // this updated.
291 KnownBits Known = computeKnownBits(V, Depth, SQ);
292 return Known.isNonNegative() &&
293 (Known.isNonZero() || isKnownNonZero(V, SQ, Depth));
294}
295
297 unsigned Depth) {
298 return computeKnownBits(V, Depth, SQ).isNegative();
299}
300
301static bool isKnownNonEqual(const Value *V1, const Value *V2,
302 const APInt &DemandedElts, unsigned Depth,
303 const SimplifyQuery &Q);
304
305bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
306 const DataLayout &DL, AssumptionCache *AC,
307 const Instruction *CxtI, const DominatorTree *DT,
308 bool UseInstrInfo) {
309 // We don't support looking through casts.
310 if (V1 == V2 || V1->getType() != V2->getType())
311 return false;
312 auto *FVTy = dyn_cast<FixedVectorType>(V1->getType());
313 APInt DemandedElts =
314 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
315 return ::isKnownNonEqual(
316 V1, V2, DemandedElts, 0,
317 SimplifyQuery(DL, DT, AC, safeCxtI(V2, V1, CxtI), UseInstrInfo));
318}
319
320bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask,
321 const SimplifyQuery &SQ, unsigned Depth) {
322 KnownBits Known(Mask.getBitWidth());
323 computeKnownBits(V, Known, Depth, SQ);
324 return Mask.isSubsetOf(Known.Zero);
325}
326
327static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
328 unsigned Depth, const SimplifyQuery &Q);
329
330static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
331 const SimplifyQuery &Q) {
332 auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
333 APInt DemandedElts =
334 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
335 return ComputeNumSignBits(V, DemandedElts, Depth, Q);
336}
337
338unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
339 unsigned Depth, AssumptionCache *AC,
340 const Instruction *CxtI,
341 const DominatorTree *DT, bool UseInstrInfo) {
342 return ::ComputeNumSignBits(
343 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
344}
345
347 unsigned Depth, AssumptionCache *AC,
348 const Instruction *CxtI,
349 const DominatorTree *DT) {
350 unsigned SignBits = ComputeNumSignBits(V, DL, Depth, AC, CxtI, DT);
351 return V->getType()->getScalarSizeInBits() - SignBits + 1;
352}
353
354static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
355 bool NSW, bool NUW,
356 const APInt &DemandedElts,
357 KnownBits &KnownOut, KnownBits &Known2,
358 unsigned Depth, const SimplifyQuery &Q) {
359 computeKnownBits(Op1, DemandedElts, KnownOut, Depth + 1, Q);
360
361 // If one operand is unknown and we have no nowrap information,
362 // the result will be unknown independently of the second operand.
363 if (KnownOut.isUnknown() && !NSW && !NUW)
364 return;
365
366 computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q);
367 KnownOut = KnownBits::computeForAddSub(Add, NSW, NUW, Known2, KnownOut);
368}
369
370static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
371 bool NUW, const APInt &DemandedElts,
372 KnownBits &Known, KnownBits &Known2,
373 unsigned Depth, const SimplifyQuery &Q) {
374 computeKnownBits(Op1, DemandedElts, Known, Depth + 1, Q);
375 computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q);
376
377 bool isKnownNegative = false;
378 bool isKnownNonNegative = false;
379 // If the multiplication is known not to overflow, compute the sign bit.
380 if (NSW) {
381 if (Op0 == Op1) {
382 // The product of a number with itself is non-negative.
383 isKnownNonNegative = true;
384 } else {
385 bool isKnownNonNegativeOp1 = Known.isNonNegative();
386 bool isKnownNonNegativeOp0 = Known2.isNonNegative();
387 bool isKnownNegativeOp1 = Known.isNegative();
388 bool isKnownNegativeOp0 = Known2.isNegative();
389 // The product of two numbers with the same sign is non-negative.
390 isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
391 (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
392 if (!isKnownNonNegative && NUW) {
393 // mul nuw nsw with a factor > 1 is non-negative.
395 isKnownNonNegative = KnownBits::sgt(Known, One).value_or(false) ||
396 KnownBits::sgt(Known2, One).value_or(false);
397 }
398
399 // The product of a negative number and a non-negative number is either
400 // negative or zero.
403 (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
404 Known2.isNonZero()) ||
405 (isKnownNegativeOp0 && isKnownNonNegativeOp1 && Known.isNonZero());
406 }
407 }
408
409 bool SelfMultiply = Op0 == Op1;
410 if (SelfMultiply)
411 SelfMultiply &=
412 isGuaranteedNotToBeUndef(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1);
413 Known = KnownBits::mul(Known, Known2, SelfMultiply);
414
415 // Only make use of no-wrap flags if we failed to compute the sign bit
416 // directly. This matters if the multiplication always overflows, in
417 // which case we prefer to follow the result of the direct computation,
418 // though as the program is invoking undefined behaviour we can choose
419 // whatever we like here.
420 if (isKnownNonNegative && !Known.isNegative())
421 Known.makeNonNegative();
422 else if (isKnownNegative && !Known.isNonNegative())
423 Known.makeNegative();
424}
425
427 KnownBits &Known) {
428 unsigned BitWidth = Known.getBitWidth();
429 unsigned NumRanges = Ranges.getNumOperands() / 2;
430 assert(NumRanges >= 1);
431
432 Known.Zero.setAllBits();
433 Known.One.setAllBits();
434
435 for (unsigned i = 0; i < NumRanges; ++i) {
437 mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
439 mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
440 ConstantRange Range(Lower->getValue(), Upper->getValue());
441
442 // The first CommonPrefixBits of all values in Range are equal.
443 unsigned CommonPrefixBits =
445 APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits);
447 Known.One &= UnsignedMax & Mask;
448 Known.Zero &= ~UnsignedMax & Mask;
449 }
450}
451
452static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
456
457 // The instruction defining an assumption's condition itself is always
458 // considered ephemeral to that assumption (even if it has other
459 // non-ephemeral users). See r246696's test case for an example.
460 if (is_contained(I->operands(), E))
461 return true;
462
463 while (!WorkSet.empty()) {
464 const Value *V = WorkSet.pop_back_val();
465 if (!Visited.insert(V).second)
466 continue;
467
468 // If all uses of this value are ephemeral, then so is this value.
469 if (llvm::all_of(V->users(), [&](const User *U) {
470 return EphValues.count(U);
471 })) {
472 if (V == E)
473 return true;
474
475 if (V == I || (isa<Instruction>(V) &&
476 !cast<Instruction>(V)->mayHaveSideEffects() &&
477 !cast<Instruction>(V)->isTerminator())) {
478 EphValues.insert(V);
479 if (const User *U = dyn_cast<User>(V))
480 append_range(WorkSet, U->operands());
481 }
482 }
483 }
484
485 return false;
486}
487
488// Is this an intrinsic that cannot be speculated but also cannot trap?
490 if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(I))
491 return CI->isAssumeLikeIntrinsic();
492
493 return false;
494}
495
497 const Instruction *CxtI,
498 const DominatorTree *DT,
499 bool AllowEphemerals) {
500 // There are two restrictions on the use of an assume:
501 // 1. The assume must dominate the context (or the control flow must
502 // reach the assume whenever it reaches the context).
503 // 2. The context must not be in the assume's set of ephemeral values
504 // (otherwise we will use the assume to prove that the condition
505 // feeding the assume is trivially true, thus causing the removal of
506 // the assume).
507
508 if (Inv->getParent() == CxtI->getParent()) {
509 // If Inv and CtxI are in the same block, check if the assume (Inv) is first
510 // in the BB.
511 if (Inv->comesBefore(CxtI))
512 return true;
513
514 // Don't let an assume affect itself - this would cause the problems
515 // `isEphemeralValueOf` is trying to prevent, and it would also make
516 // the loop below go out of bounds.
517 if (!AllowEphemerals && Inv == CxtI)
518 return false;
519
520 // The context comes first, but they're both in the same block.
521 // Make sure there is nothing in between that might interrupt
522 // the control flow, not even CxtI itself.
523 // We limit the scan distance between the assume and its context instruction
524 // to avoid a compile-time explosion. This limit is chosen arbitrarily, so
525 // it can be adjusted if needed (could be turned into a cl::opt).
526 auto Range = make_range(CxtI->getIterator(), Inv->getIterator());
528 return false;
529
530 return AllowEphemerals || !isEphemeralValueOf(Inv, CxtI);
531 }
532
533 // Inv and CxtI are in different blocks.
534 if (DT) {
535 if (DT->dominates(Inv, CxtI))
536 return true;
537 } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor() ||
538 Inv->getParent()->isEntryBlock()) {
539 // We don't have a DT, but this trivially dominates.
540 return true;
541 }
542
543 return false;
544}
545
546// TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but
547// we still have enough information about `RHS` to conclude non-zero. For
548// example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops
549// so the extra compile time may not be worth it, but possibly a second API
550// should be created for use outside of loops.
551static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) {
552 // v u> y implies v != 0.
553 if (Pred == ICmpInst::ICMP_UGT)
554 return true;
555
556 // Special-case v != 0 to also handle v != null.
557 if (Pred == ICmpInst::ICMP_NE)
558 return match(RHS, m_Zero());
559
560 // All other predicates - rely on generic ConstantRange handling.
561 const APInt *C;
563 if (match(RHS, m_APInt(C))) {
565 return !TrueValues.contains(Zero);
566 }
567
568 auto *VC = dyn_cast<ConstantDataVector>(RHS);
569 if (VC == nullptr)
570 return false;
571
572 for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem;
573 ++ElemIdx) {
575 Pred, VC->getElementAsAPInt(ElemIdx));
576 if (TrueValues.contains(Zero))
577 return false;
578 }
579 return true;
580}
581
582static bool isKnownNonZeroFromAssume(const Value *V, const SimplifyQuery &Q) {
583 // Use of assumptions is context-sensitive. If we don't have a context, we
584 // cannot use them!
585 if (!Q.AC || !Q.CxtI)
586 return false;
587
588 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) {
589 if (!Elem.Assume)
590 continue;
591
592 AssumeInst *I = cast<AssumeInst>(Elem.Assume);
593 assert(I->getFunction() == Q.CxtI->getFunction() &&
594 "Got assumption for the wrong function!");
595
596 if (Elem.Index != AssumptionCache::ExprResultIdx) {
597 if (!V->getType()->isPointerTy())
598 continue;
600 *I, I->bundle_op_info_begin()[Elem.Index])) {
601 if (RK.WasOn == V &&
602 (RK.AttrKind == Attribute::NonNull ||
603 (RK.AttrKind == Attribute::Dereferenceable &&
605 V->getType()->getPointerAddressSpace()))) &&
607 return true;
608 }
609 continue;
610 }
611
612 // Warning: This loop can end up being somewhat performance sensitive.
613 // We're running this loop for once for each value queried resulting in a
614 // runtime of ~O(#assumes * #values).
615
616 Value *RHS;
617 CmpPredicate Pred;
618 auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V)));
619 if (!match(I->getArgOperand(0), m_c_ICmp(Pred, m_V, m_Value(RHS))))
620 continue;
621
622 if (cmpExcludesZero(Pred, RHS) && isValidAssumeForContext(I, Q.CxtI, Q.DT))
623 return true;
624 }
625
626 return false;
627}
628
630 Value *LHS, Value *RHS, KnownBits &Known,
631 const SimplifyQuery &Q) {
632 if (RHS->getType()->isPointerTy()) {
633 // Handle comparison of pointer to null explicitly, as it will not be
634 // covered by the m_APInt() logic below.
635 if (LHS == V && match(RHS, m_Zero())) {
636 switch (Pred) {
637 case ICmpInst::ICMP_EQ:
638 Known.setAllZero();
639 break;
640 case ICmpInst::ICMP_SGE:
641 case ICmpInst::ICMP_SGT:
642 Known.makeNonNegative();
643 break;
644 case ICmpInst::ICMP_SLT:
645 Known.makeNegative();
646 break;
647 default:
648 break;
649 }
650 }
651 return;
652 }
653
654 unsigned BitWidth = Known.getBitWidth();
655 auto m_V =
657
658 Value *Y;
659 const APInt *Mask, *C;
660 uint64_t ShAmt;
661 switch (Pred) {
662 case ICmpInst::ICMP_EQ:
663 // assume(V = C)
664 if (match(LHS, m_V) && match(RHS, m_APInt(C))) {
665 Known = Known.unionWith(KnownBits::makeConstant(*C));
666 // assume(V & Mask = C)
667 } else if (match(LHS, m_c_And(m_V, m_Value(Y))) &&
668 match(RHS, m_APInt(C))) {
669 // For one bits in Mask, we can propagate bits from C to V.
670 Known.One |= *C;
671 if (match(Y, m_APInt(Mask)))
672 Known.Zero |= ~*C & *Mask;
673 // assume(V | Mask = C)
674 } else if (match(LHS, m_c_Or(m_V, m_Value(Y))) && match(RHS, m_APInt(C))) {
675 // For zero bits in Mask, we can propagate bits from C to V.
676 Known.Zero |= ~*C;
677 if (match(Y, m_APInt(Mask)))
678 Known.One |= *C & ~*Mask;
679 // assume(V ^ Mask = C)
680 } else if (match(LHS, m_Xor(m_V, m_APInt(Mask))) &&
681 match(RHS, m_APInt(C))) {
682 // Equivalent to assume(V == Mask ^ C)
683 Known = Known.unionWith(KnownBits::makeConstant(*C ^ *Mask));
684 // assume(V << ShAmt = C)
685 } else if (match(LHS, m_Shl(m_V, m_ConstantInt(ShAmt))) &&
686 match(RHS, m_APInt(C)) && ShAmt < BitWidth) {
687 // For those bits in C that are known, we can propagate them to known
688 // bits in V shifted to the right by ShAmt.
690 RHSKnown.Zero.lshrInPlace(ShAmt);
691 RHSKnown.One.lshrInPlace(ShAmt);
692 Known = Known.unionWith(RHSKnown);
693 // assume(V >> ShAmt = C)
694 } else if (match(LHS, m_Shr(m_V, m_ConstantInt(ShAmt))) &&
695 match(RHS, m_APInt(C)) && ShAmt < BitWidth) {
697 // For those bits in RHS that are known, we can propagate them to known
698 // bits in V shifted to the right by C.
699 Known.Zero |= RHSKnown.Zero << ShAmt;
700 Known.One |= RHSKnown.One << ShAmt;
701 }
702 break;
703 case ICmpInst::ICMP_NE: {
704 // assume (V & B != 0) where B is a power of 2
705 const APInt *BPow2;
706 if (match(LHS, m_And(m_V, m_Power2(BPow2))) && match(RHS, m_Zero()))
707 Known.One |= *BPow2;
708 break;
709 }
710 default:
711 if (match(RHS, m_APInt(C))) {
712 const APInt *Offset = nullptr;
713 if (match(LHS, m_CombineOr(m_V, m_AddLike(m_V, m_APInt(Offset))))) {
715 if (Offset)
716 LHSRange = LHSRange.sub(*Offset);
717 Known = Known.unionWith(LHSRange.toKnownBits());
718 }
719 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
720 // X & Y u> C -> X u> C && Y u> C
721 // X nuw- Y u> C -> X u> C
722 if (match(LHS, m_c_And(m_V, m_Value())) ||
723 match(LHS, m_NUWSub(m_V, m_Value())))
724 Known.One.setHighBits(
725 (*C + (Pred == ICmpInst::ICMP_UGT)).countLeadingOnes());
726 }
727 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
728 // X | Y u< C -> X u< C && Y u< C
729 // X nuw+ Y u< C -> X u< C && Y u< C
730 if (match(LHS, m_c_Or(m_V, m_Value())) ||
731 match(LHS, m_c_NUWAdd(m_V, m_Value()))) {
732 Known.Zero.setHighBits(
733 (*C - (Pred == ICmpInst::ICMP_ULT)).countLeadingZeros());
734 }
735 }
736 }
737 break;
738 }
739}
740
741static void computeKnownBitsFromICmpCond(const Value *V, ICmpInst *Cmp,
742 KnownBits &Known,
743 const SimplifyQuery &SQ, bool Invert) {
745 Invert ? Cmp->getInversePredicate() : Cmp->getPredicate();
746 Value *LHS = Cmp->getOperand(0);
747 Value *RHS = Cmp->getOperand(1);
748
749 // Handle icmp pred (trunc V), C
750 if (match(LHS, m_Trunc(m_Specific(V)))) {
752 computeKnownBitsFromCmp(LHS, Pred, LHS, RHS, DstKnown, SQ);
753 Known = Known.unionWith(DstKnown.anyext(Known.getBitWidth()));
754 return;
755 }
756
757 computeKnownBitsFromCmp(V, Pred, LHS, RHS, Known, SQ);
758}
759
761 KnownBits &Known, unsigned Depth,
762 const SimplifyQuery &SQ, bool Invert) {
763 Value *A, *B;
766 KnownBits Known2(Known.getBitWidth());
767 KnownBits Known3(Known.getBitWidth());
768 computeKnownBitsFromCond(V, A, Known2, Depth + 1, SQ, Invert);
769 computeKnownBitsFromCond(V, B, Known3, Depth + 1, SQ, Invert);
770 if (Invert ? match(Cond, m_LogicalOr(m_Value(), m_Value()))
772 Known2 = Known2.unionWith(Known3);
773 else
774 Known2 = Known2.intersectWith(Known3);
775 Known = Known.unionWith(Known2);
776 }
777
778 if (auto *Cmp = dyn_cast<ICmpInst>(Cond))
779 computeKnownBitsFromICmpCond(V, Cmp, Known, SQ, Invert);
780}
781
783 unsigned Depth, const SimplifyQuery &Q) {
784 // Handle injected condition.
785 if (Q.CC && Q.CC->AffectedValues.contains(V))
786 computeKnownBitsFromCond(V, Q.CC->Cond, Known, Depth, Q, Q.CC->Invert);
787
788 if (!Q.CxtI)
789 return;
790
791 if (Q.DC && Q.DT) {
792 // Handle dominating conditions.
793 for (BranchInst *BI : Q.DC->conditionsFor(V)) {
794 BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
795 if (Q.DT->dominates(Edge0, Q.CxtI->getParent()))
796 computeKnownBitsFromCond(V, BI->getCondition(), Known, Depth, Q,
797 /*Invert*/ false);
798
799 BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
800 if (Q.DT->dominates(Edge1, Q.CxtI->getParent()))
801 computeKnownBitsFromCond(V, BI->getCondition(), Known, Depth, Q,
802 /*Invert*/ true);
803 }
804
805 if (Known.hasConflict())
806 Known.resetAll();
807 }
808
809 if (!Q.AC)
810 return;
811
812 unsigned BitWidth = Known.getBitWidth();
813
814 // Note that the patterns below need to be kept in sync with the code
815 // in AssumptionCache::updateAffectedValues.
816
817 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) {
818 if (!Elem.Assume)
819 continue;
820
821 AssumeInst *I = cast<AssumeInst>(Elem.Assume);
822 assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() &&
823 "Got assumption for the wrong function!");
824
825 if (Elem.Index != AssumptionCache::ExprResultIdx) {
826 if (!V->getType()->isPointerTy())
827 continue;
829 *I, I->bundle_op_info_begin()[Elem.Index])) {
830 // Allow AllowEphemerals in isValidAssumeForContext, as the CxtI might
831 // be the producer of the pointer in the bundle. At the moment, align
832 // assumptions aren't optimized away.
833 if (RK.WasOn == V && RK.AttrKind == Attribute::Alignment &&
834 isPowerOf2_64(RK.ArgValue) &&
835 isValidAssumeForContext(I, Q.CxtI, Q.DT, /*AllowEphemerals*/ true))
836 Known.Zero.setLowBits(Log2_64(RK.ArgValue));
837 }
838 continue;
839 }
840
841 // Warning: This loop can end up being somewhat performance sensitive.
842 // We're running this loop for once for each value queried resulting in a
843 // runtime of ~O(#assumes * #values).
844
845 Value *Arg = I->getArgOperand(0);
846
847 if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
848 assert(BitWidth == 1 && "assume operand is not i1?");
849 (void)BitWidth;
850 Known.setAllOnes();
851 return;
852 }
853 if (match(Arg, m_Not(m_Specific(V))) &&
855 assert(BitWidth == 1 && "assume operand is not i1?");
856 (void)BitWidth;
857 Known.setAllZero();
858 return;
859 }
860
861 // The remaining tests are all recursive, so bail out if we hit the limit.
863 continue;
864
865 ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
866 if (!Cmp)
867 continue;
868
869 if (!isValidAssumeForContext(I, Q.CxtI, Q.DT))
870 continue;
871
872 computeKnownBitsFromICmpCond(V, Cmp, Known, Q, /*Invert=*/false);
873 }
874
875 // Conflicting assumption: Undefined behavior will occur on this execution
876 // path.
877 if (Known.hasConflict())
878 Known.resetAll();
879}
880
881/// Compute known bits from a shift operator, including those with a
882/// non-constant shift amount. Known is the output of this function. Known2 is a
883/// pre-allocated temporary with the same bit width as Known and on return
884/// contains the known bit of the shift value source. KF is an
885/// operator-specific function that, given the known-bits and a shift amount,
886/// compute the implied known-bits of the shift operator's result respectively
887/// for that shift amount. The results from calling KF are conservatively
888/// combined for all permitted shift amounts.
890 const Operator *I, const APInt &DemandedElts, KnownBits &Known,
891 KnownBits &Known2, unsigned Depth, const SimplifyQuery &Q,
892 function_ref<KnownBits(const KnownBits &, const KnownBits &, bool)> KF) {
893 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
894 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
895 // To limit compile-time impact, only query isKnownNonZero() if we know at
896 // least something about the shift amount.
897 bool ShAmtNonZero =
898 Known.isNonZero() ||
899 (Known.getMaxValue().ult(Known.getBitWidth()) &&
900 isKnownNonZero(I->getOperand(1), DemandedElts, Q, Depth + 1));
901 Known = KF(Known2, Known, ShAmtNonZero);
902}
903
904static KnownBits
905getKnownBitsFromAndXorOr(const Operator *I, const APInt &DemandedElts,
906 const KnownBits &KnownLHS, const KnownBits &KnownRHS,
907 unsigned Depth, const SimplifyQuery &Q) {
908 unsigned BitWidth = KnownLHS.getBitWidth();
909 KnownBits KnownOut(BitWidth);
910 bool IsAnd = false;
911 bool HasKnownOne = !KnownLHS.One.isZero() || !KnownRHS.One.isZero();
912 Value *X = nullptr, *Y = nullptr;
913
914 switch (I->getOpcode()) {
915 case Instruction::And:
916 KnownOut = KnownLHS & KnownRHS;
917 IsAnd = true;
918 // and(x, -x) is common idioms that will clear all but lowest set
919 // bit. If we have a single known bit in x, we can clear all bits
920 // above it.
921 // TODO: instcombine often reassociates independent `and` which can hide
922 // this pattern. Try to match and(x, and(-x, y)) / and(and(x, y), -x).
923 if (HasKnownOne && match(I, m_c_And(m_Value(X), m_Neg(m_Deferred(X))))) {
924 // -(-x) == x so using whichever (LHS/RHS) gets us a better result.
925 if (KnownLHS.countMaxTrailingZeros() <= KnownRHS.countMaxTrailingZeros())
926 KnownOut = KnownLHS.blsi();
927 else
928 KnownOut = KnownRHS.blsi();
929 }
930 break;
931 case Instruction::Or:
932 KnownOut = KnownLHS | KnownRHS;
933 break;
934 case Instruction::Xor:
935 KnownOut = KnownLHS ^ KnownRHS;
936 // xor(x, x-1) is common idioms that will clear all but lowest set
937 // bit. If we have a single known bit in x, we can clear all bits
938 // above it.
939 // TODO: xor(x, x-1) is often rewritting as xor(x, x-C) where C !=
940 // -1 but for the purpose of demanded bits (xor(x, x-C) &
941 // Demanded) == (xor(x, x-1) & Demanded). Extend the xor pattern
942 // to use arbitrary C if xor(x, x-C) as the same as xor(x, x-1).
943 if (HasKnownOne &&
945 const KnownBits &XBits = I->getOperand(0) == X ? KnownLHS : KnownRHS;
946 KnownOut = XBits.blsmsk();
947 }
948 break;
949 default:
950 llvm_unreachable("Invalid Op used in 'analyzeKnownBitsFromAndXorOr'");
951 }
952
953 // and(x, add (x, -1)) is a common idiom that always clears the low bit;
954 // xor/or(x, add (x, -1)) is an idiom that will always set the low bit.
955 // here we handle the more general case of adding any odd number by
956 // matching the form and/xor/or(x, add(x, y)) where y is odd.
957 // TODO: This could be generalized to clearing any bit set in y where the
958 // following bit is known to be unset in y.
959 if (!KnownOut.Zero[0] && !KnownOut.One[0] &&
963 KnownBits KnownY(BitWidth);
964 computeKnownBits(Y, DemandedElts, KnownY, Depth + 1, Q);
965 if (KnownY.countMinTrailingOnes() > 0) {
966 if (IsAnd)
967 KnownOut.Zero.setBit(0);
968 else
969 KnownOut.One.setBit(0);
970 }
971 }
972 return KnownOut;
973}
974
976 const Operator *I, const APInt &DemandedElts, unsigned Depth,
977 const SimplifyQuery &Q,
978 const function_ref<KnownBits(const KnownBits &, const KnownBits &)>
979 KnownBitsFunc) {
980 APInt DemandedEltsLHS, DemandedEltsRHS;
982 DemandedElts, DemandedEltsLHS,
983 DemandedEltsRHS);
984
985 const auto ComputeForSingleOpFunc =
986 [Depth, &Q, KnownBitsFunc](const Value *Op, APInt &DemandedEltsOp) {
987 return KnownBitsFunc(
988 computeKnownBits(Op, DemandedEltsOp, Depth + 1, Q),
989 computeKnownBits(Op, DemandedEltsOp << 1, Depth + 1, Q));
990 };
991
992 if (DemandedEltsRHS.isZero())
993 return ComputeForSingleOpFunc(I->getOperand(0), DemandedEltsLHS);
994 if (DemandedEltsLHS.isZero())
995 return ComputeForSingleOpFunc(I->getOperand(1), DemandedEltsRHS);
996
997 return ComputeForSingleOpFunc(I->getOperand(0), DemandedEltsLHS)
998 .intersectWith(ComputeForSingleOpFunc(I->getOperand(1), DemandedEltsRHS));
999}
1000
1001// Public so this can be used in `SimplifyDemandedUseBits`.
1003 const KnownBits &KnownLHS,
1004 const KnownBits &KnownRHS,
1005 unsigned Depth,
1006 const SimplifyQuery &SQ) {
1007 auto *FVTy = dyn_cast<FixedVectorType>(I->getType());
1008 APInt DemandedElts =
1009 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
1010
1011 return getKnownBitsFromAndXorOr(I, DemandedElts, KnownLHS, KnownRHS, Depth,
1012 SQ);
1013}
1014
1016 Attribute Attr = F->getFnAttribute(Attribute::VScaleRange);
1017 // Without vscale_range, we only know that vscale is non-zero.
1018 if (!Attr.isValid())
1020
1021 unsigned AttrMin = Attr.getVScaleRangeMin();
1022 // Minimum is larger than vscale width, result is always poison.
1023 if ((unsigned)llvm::bit_width(AttrMin) > BitWidth)
1024 return ConstantRange::getEmpty(BitWidth);
1025
1026 APInt Min(BitWidth, AttrMin);
1027 std::optional<unsigned> AttrMax = Attr.getVScaleRangeMax();
1028 if (!AttrMax || (unsigned)llvm::bit_width(*AttrMax) > BitWidth)
1030
1031 return ConstantRange(Min, APInt(BitWidth, *AttrMax) + 1);
1032}
1033
1035 Value *Arm, bool Invert, unsigned Depth,
1036 const SimplifyQuery &Q) {
1037 // If we have a constant arm, we are done.
1038 if (Known.isConstant())
1039 return;
1040
1041 // See what condition implies about the bits of the select arm.
1042 KnownBits CondRes(Known.getBitWidth());
1043 computeKnownBitsFromCond(Arm, Cond, CondRes, Depth + 1, Q, Invert);
1044 // If we don't get any information from the condition, no reason to
1045 // proceed.
1046 if (CondRes.isUnknown())
1047 return;
1048
1049 // We can have conflict if the condition is dead. I.e if we have
1050 // (x | 64) < 32 ? (x | 64) : y
1051 // we will have conflict at bit 6 from the condition/the `or`.
1052 // In that case just return. Its not particularly important
1053 // what we do, as this select is going to be simplified soon.
1054 CondRes = CondRes.unionWith(Known);
1055 if (CondRes.hasConflict())
1056 return;
1057
1058 // Finally make sure the information we found is valid. This is relatively
1059 // expensive so it's left for the very end.
1060 if (!isGuaranteedNotToBeUndef(Arm, Q.AC, Q.CxtI, Q.DT, Depth + 1))
1061 return;
1062
1063 // Finally, we know we get information from the condition and its valid,
1064 // so return it.
1065 Known = CondRes;
1066}
1067
1069 const APInt &DemandedElts,
1070 KnownBits &Known, unsigned Depth,
1071 const SimplifyQuery &Q) {
1072 unsigned BitWidth = Known.getBitWidth();
1073
1074 KnownBits Known2(BitWidth);
1075 switch (I->getOpcode()) {
1076 default: break;
1077 case Instruction::Load:
1078 if (MDNode *MD =
1079 Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_range))
1081 break;
1082 case Instruction::And:
1083 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1084 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1085
1086 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q);
1087 break;
1088 case Instruction::Or:
1089 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1090 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1091
1092 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q);
1093 break;
1094 case Instruction::Xor:
1095 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1096 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1097
1098 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q);
1099 break;
1100 case Instruction::Mul: {
1101 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1102 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1103 computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, NUW,
1104 DemandedElts, Known, Known2, Depth, Q);
1105 break;
1106 }
1107 case Instruction::UDiv: {
1108 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1109 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1110 Known =
1111 KnownBits::udiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I)));
1112 break;
1113 }
1114 case Instruction::SDiv: {
1115 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1116 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1117 Known =
1118 KnownBits::sdiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I)));
1119 break;
1120 }
1121 case Instruction::Select: {
1122 auto ComputeForArm = [&](Value *Arm, bool Invert) {
1123 KnownBits Res(Known.getBitWidth());
1124 computeKnownBits(Arm, DemandedElts, Res, Depth + 1, Q);
1125 adjustKnownBitsForSelectArm(Res, I->getOperand(0), Arm, Invert, Depth, Q);
1126 return Res;
1127 };
1128 // Only known if known in both the LHS and RHS.
1129 Known =
1130 ComputeForArm(I->getOperand(1), /*Invert=*/false)
1131 .intersectWith(ComputeForArm(I->getOperand(2), /*Invert=*/true));
1132 break;
1133 }
1134 case Instruction::FPTrunc:
1135 case Instruction::FPExt:
1136 case Instruction::FPToUI:
1137 case Instruction::FPToSI:
1138 case Instruction::SIToFP:
1139 case Instruction::UIToFP:
1140 break; // Can't work with floating point.
1141 case Instruction::PtrToInt:
1142 case Instruction::IntToPtr:
1143 // Fall through and handle them the same as zext/trunc.
1144 [[fallthrough]];
1145 case Instruction::ZExt:
1146 case Instruction::Trunc: {
1147 Type *SrcTy = I->getOperand(0)->getType();
1148
1149 unsigned SrcBitWidth;
1150 // Note that we handle pointer operands here because of inttoptr/ptrtoint
1151 // which fall through here.
1152 Type *ScalarTy = SrcTy->getScalarType();
1153 SrcBitWidth = ScalarTy->isPointerTy() ?
1154 Q.DL.getPointerTypeSizeInBits(ScalarTy) :
1155 Q.DL.getTypeSizeInBits(ScalarTy);
1156
1157 assert(SrcBitWidth && "SrcBitWidth can't be zero");
1158 Known = Known.anyextOrTrunc(SrcBitWidth);
1159 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1160 if (auto *Inst = dyn_cast<PossiblyNonNegInst>(I);
1161 Inst && Inst->hasNonNeg() && !Known.isNegative())
1162 Known.makeNonNegative();
1163 Known = Known.zextOrTrunc(BitWidth);
1164 break;
1165 }
1166 case Instruction::BitCast: {
1167 Type *SrcTy = I->getOperand(0)->getType();
1168 if (SrcTy->isIntOrPtrTy() &&
1169 // TODO: For now, not handling conversions like:
1170 // (bitcast i64 %x to <2 x i32>)
1171 !I->getType()->isVectorTy()) {
1172 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1173 break;
1174 }
1175
1176 const Value *V;
1177 // Handle bitcast from floating point to integer.
1178 if (match(I, m_ElementWiseBitCast(m_Value(V))) &&
1179 V->getType()->isFPOrFPVectorTy()) {
1180 Type *FPType = V->getType()->getScalarType();
1181 KnownFPClass Result =
1182 computeKnownFPClass(V, DemandedElts, fcAllFlags, Depth + 1, Q);
1183 FPClassTest FPClasses = Result.KnownFPClasses;
1184
1185 // TODO: Treat it as zero/poison if the use of I is unreachable.
1186 if (FPClasses == fcNone)
1187 break;
1188
1189 if (Result.isKnownNever(fcNormal | fcSubnormal | fcNan)) {
1190 Known.Zero.setAllBits();
1191 Known.One.setAllBits();
1192
1193 if (FPClasses & fcInf)
1195 APFloat::getInf(FPType->getFltSemantics()).bitcastToAPInt()));
1196
1197 if (FPClasses & fcZero)
1199 APInt::getZero(FPType->getScalarSizeInBits())));
1200
1201 Known.Zero.clearSignBit();
1202 Known.One.clearSignBit();
1203 }
1204
1205 if (Result.SignBit) {
1206 if (*Result.SignBit)
1207 Known.makeNegative();
1208 else
1209 Known.makeNonNegative();
1210 }
1211
1212 break;
1213 }
1214
1215 // Handle cast from vector integer type to scalar or vector integer.
1216 auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcTy);
1217 if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() ||
1218 !I->getType()->isIntOrIntVectorTy() ||
1219 isa<ScalableVectorType>(I->getType()))
1220 break;
1221
1222 // Look through a cast from narrow vector elements to wider type.
1223 // Examples: v4i32 -> v2i64, v3i8 -> v24
1224 unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits();
1225 if (BitWidth % SubBitWidth == 0) {
1226 // Known bits are automatically intersected across demanded elements of a
1227 // vector. So for example, if a bit is computed as known zero, it must be
1228 // zero across all demanded elements of the vector.
1229 //
1230 // For this bitcast, each demanded element of the output is sub-divided
1231 // across a set of smaller vector elements in the source vector. To get
1232 // the known bits for an entire element of the output, compute the known
1233 // bits for each sub-element sequentially. This is done by shifting the
1234 // one-set-bit demanded elements parameter across the sub-elements for
1235 // consecutive calls to computeKnownBits. We are using the demanded
1236 // elements parameter as a mask operator.
1237 //
1238 // The known bits of each sub-element are then inserted into place
1239 // (dependent on endian) to form the full result of known bits.
1240 unsigned NumElts = DemandedElts.getBitWidth();
1241 unsigned SubScale = BitWidth / SubBitWidth;
1242 APInt SubDemandedElts = APInt::getZero(NumElts * SubScale);
1243 for (unsigned i = 0; i != NumElts; ++i) {
1244 if (DemandedElts[i])
1245 SubDemandedElts.setBit(i * SubScale);
1246 }
1247
1248 KnownBits KnownSrc(SubBitWidth);
1249 for (unsigned i = 0; i != SubScale; ++i) {
1250 computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc,
1251 Depth + 1, Q);
1252 unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i;
1253 Known.insertBits(KnownSrc, ShiftElt * SubBitWidth);
1254 }
1255 }
1256 break;
1257 }
1258 case Instruction::SExt: {
1259 // Compute the bits in the result that are not present in the input.
1260 unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
1261
1262 Known = Known.trunc(SrcBitWidth);
1263 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1264 // If the sign bit of the input is known set or clear, then we know the
1265 // top bits of the result.
1266 Known = Known.sext(BitWidth);
1267 break;
1268 }
1269 case Instruction::Shl: {
1270 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1271 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1272 auto KF = [NUW, NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1273 bool ShAmtNonZero) {
1274 return KnownBits::shl(KnownVal, KnownAmt, NUW, NSW, ShAmtNonZero);
1275 };
1276 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1277 KF);
1278 // Trailing zeros of a right-shifted constant never decrease.
1279 const APInt *C;
1280 if (match(I->getOperand(0), m_APInt(C)))
1281 Known.Zero.setLowBits(C->countr_zero());
1282 break;
1283 }
1284 case Instruction::LShr: {
1285 bool Exact = Q.IIQ.isExact(cast<BinaryOperator>(I));
1286 auto KF = [Exact](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1287 bool ShAmtNonZero) {
1288 return KnownBits::lshr(KnownVal, KnownAmt, ShAmtNonZero, Exact);
1289 };
1290 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1291 KF);
1292 // Leading zeros of a left-shifted constant never decrease.
1293 const APInt *C;
1294 if (match(I->getOperand(0), m_APInt(C)))
1295 Known.Zero.setHighBits(C->countl_zero());
1296 break;
1297 }
1298 case Instruction::AShr: {
1299 bool Exact = Q.IIQ.isExact(cast<BinaryOperator>(I));
1300 auto KF = [Exact](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1301 bool ShAmtNonZero) {
1302 return KnownBits::ashr(KnownVal, KnownAmt, ShAmtNonZero, Exact);
1303 };
1304 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1305 KF);
1306 break;
1307 }
1308 case Instruction::Sub: {
1309 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1310 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1311 computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, NUW,
1312 DemandedElts, Known, Known2, Depth, Q);
1313 break;
1314 }
1315 case Instruction::Add: {
1316 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1317 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1318 computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, NUW,
1319 DemandedElts, Known, Known2, Depth, Q);
1320 break;
1321 }
1322 case Instruction::SRem:
1323 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1324 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1325 Known = KnownBits::srem(Known, Known2);
1326 break;
1327
1328 case Instruction::URem:
1329 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1330 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1331 Known = KnownBits::urem(Known, Known2);
1332 break;
1333 case Instruction::Alloca:
1334 Known.Zero.setLowBits(Log2(cast<AllocaInst>(I)->getAlign()));
1335 break;
1336 case Instruction::GetElementPtr: {
1337 // Analyze all of the subscripts of this getelementptr instruction
1338 // to determine if we can prove known low zero bits.
1339 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1340 // Accumulate the constant indices in a separate variable
1341 // to minimize the number of calls to computeForAddSub.
1342 APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true);
1343
1345 for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
1346 // TrailZ can only become smaller, short-circuit if we hit zero.
1347 if (Known.isUnknown())
1348 break;
1349
1350 Value *Index = I->getOperand(i);
1351
1352 // Handle case when index is zero.
1353 Constant *CIndex = dyn_cast<Constant>(Index);
1354 if (CIndex && CIndex->isZeroValue())
1355 continue;
1356
1357 if (StructType *STy = GTI.getStructTypeOrNull()) {
1358 // Handle struct member offset arithmetic.
1359
1360 assert(CIndex &&
1361 "Access to structure field must be known at compile time");
1362
1363 if (CIndex->getType()->isVectorTy())
1364 Index = CIndex->getSplatValue();
1365
1366 unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
1367 const StructLayout *SL = Q.DL.getStructLayout(STy);
1369 AccConstIndices += Offset;
1370 continue;
1371 }
1372
1373 // Handle array index arithmetic.
1374 Type *IndexedTy = GTI.getIndexedType();
1375 if (!IndexedTy->isSized()) {
1376 Known.resetAll();
1377 break;
1378 }
1379
1380 unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits();
1381 KnownBits IndexBits(IndexBitWidth);
1382 computeKnownBits(Index, IndexBits, Depth + 1, Q);
1383 TypeSize IndexTypeSize = GTI.getSequentialElementStride(Q.DL);
1384 uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinValue();
1385 KnownBits ScalingFactor(IndexBitWidth);
1386 // Multiply by current sizeof type.
1387 // &A[i] == A + i * sizeof(*A[i]).
1388 if (IndexTypeSize.isScalable()) {
1389 // For scalable types the only thing we know about sizeof is
1390 // that this is a multiple of the minimum size.
1391 ScalingFactor.Zero.setLowBits(llvm::countr_zero(TypeSizeInBytes));
1392 } else if (IndexBits.isConstant()) {
1393 APInt IndexConst = IndexBits.getConstant();
1394 APInt ScalingFactor(IndexBitWidth, TypeSizeInBytes);
1395 IndexConst *= ScalingFactor;
1396 AccConstIndices += IndexConst.sextOrTrunc(BitWidth);
1397 continue;
1398 } else {
1399 ScalingFactor =
1400 KnownBits::makeConstant(APInt(IndexBitWidth, TypeSizeInBytes));
1401 }
1402 IndexBits = KnownBits::mul(IndexBits, ScalingFactor);
1403
1404 // If the offsets have a different width from the pointer, according
1405 // to the language reference we need to sign-extend or truncate them
1406 // to the width of the pointer.
1407 IndexBits = IndexBits.sextOrTrunc(BitWidth);
1408
1409 // Note that inbounds does *not* guarantee nsw for the addition, as only
1410 // the offset is signed, while the base address is unsigned.
1411 Known = KnownBits::add(Known, IndexBits);
1412 }
1413 if (!Known.isUnknown() && !AccConstIndices.isZero()) {
1414 KnownBits Index = KnownBits::makeConstant(AccConstIndices);
1415 Known = KnownBits::add(Known, Index);
1416 }
1417 break;
1418 }
1419 case Instruction::PHI: {
1420 const PHINode *P = cast<PHINode>(I);
1421 BinaryOperator *BO = nullptr;
1422 Value *R = nullptr, *L = nullptr;
1423 if (matchSimpleRecurrence(P, BO, R, L)) {
1424 // Handle the case of a simple two-predecessor recurrence PHI.
1425 // There's a lot more that could theoretically be done here, but
1426 // this is sufficient to catch some interesting cases.
1427 unsigned Opcode = BO->getOpcode();
1428
1429 switch (Opcode) {
1430 // If this is a shift recurrence, we know the bits being shifted in. We
1431 // can combine that with information about the start value of the
1432 // recurrence to conclude facts about the result. If this is a udiv
1433 // recurrence, we know that the result can never exceed either the
1434 // numerator or the start value, whichever is greater.
1435 case Instruction::LShr:
1436 case Instruction::AShr:
1437 case Instruction::Shl:
1438 case Instruction::UDiv:
1439 if (BO->getOperand(0) != I)
1440 break;
1441 [[fallthrough]];
1442
1443 // For a urem recurrence, the result can never exceed the start value. The
1444 // phi could either be the numerator or the denominator.
1445 case Instruction::URem: {
1446 // We have matched a recurrence of the form:
1447 // %iv = [R, %entry], [%iv.next, %backedge]
1448 // %iv.next = shift_op %iv, L
1449
1450 // Recurse with the phi context to avoid concern about whether facts
1451 // inferred hold at original context instruction. TODO: It may be
1452 // correct to use the original context. IF warranted, explore and
1453 // add sufficient tests to cover.
1455 RecQ.CxtI = P;
1456 computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ);
1457 switch (Opcode) {
1458 case Instruction::Shl:
1459 // A shl recurrence will only increase the tailing zeros
1460 Known.Zero.setLowBits(Known2.countMinTrailingZeros());
1461 break;
1462 case Instruction::LShr:
1463 case Instruction::UDiv:
1464 case Instruction::URem:
1465 // lshr, udiv, and urem recurrences will preserve the leading zeros of
1466 // the start value.
1467 Known.Zero.setHighBits(Known2.countMinLeadingZeros());
1468 break;
1469 case Instruction::AShr:
1470 // An ashr recurrence will extend the initial sign bit
1471 Known.Zero.setHighBits(Known2.countMinLeadingZeros());
1472 Known.One.setHighBits(Known2.countMinLeadingOnes());
1473 break;
1474 }
1475 break;
1476 }
1477
1478 // Check for operations that have the property that if
1479 // both their operands have low zero bits, the result
1480 // will have low zero bits.
1481 case Instruction::Add:
1482 case Instruction::Sub:
1483 case Instruction::And:
1484 case Instruction::Or:
1485 case Instruction::Mul: {
1486 // Change the context instruction to the "edge" that flows into the
1487 // phi. This is important because that is where the value is actually
1488 // "evaluated" even though it is used later somewhere else. (see also
1489 // D69571).
1491
1492 unsigned OpNum = P->getOperand(0) == R ? 0 : 1;
1493 Instruction *RInst = P->getIncomingBlock(OpNum)->getTerminator();
1494 Instruction *LInst = P->getIncomingBlock(1 - OpNum)->getTerminator();
1495
1496 // Ok, we have a PHI of the form L op= R. Check for low
1497 // zero bits.
1498 RecQ.CxtI = RInst;
1499 computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ);
1500
1501 // We need to take the minimum number of known bits
1502 KnownBits Known3(BitWidth);
1503 RecQ.CxtI = LInst;
1504 computeKnownBits(L, DemandedElts, Known3, Depth + 1, RecQ);
1505
1506 Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(),
1507 Known3.countMinTrailingZeros()));
1508
1509 auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(BO);
1510 if (!OverflowOp || !Q.IIQ.hasNoSignedWrap(OverflowOp))
1511 break;
1512
1513 switch (Opcode) {
1514 // If initial value of recurrence is nonnegative, and we are adding
1515 // a nonnegative number with nsw, the result can only be nonnegative
1516 // or poison value regardless of the number of times we execute the
1517 // add in phi recurrence. If initial value is negative and we are
1518 // adding a negative number with nsw, the result can only be
1519 // negative or poison value. Similar arguments apply to sub and mul.
1520 //
1521 // (add non-negative, non-negative) --> non-negative
1522 // (add negative, negative) --> negative
1523 case Instruction::Add: {
1524 if (Known2.isNonNegative() && Known3.isNonNegative())
1525 Known.makeNonNegative();
1526 else if (Known2.isNegative() && Known3.isNegative())
1527 Known.makeNegative();
1528 break;
1529 }
1530
1531 // (sub nsw non-negative, negative) --> non-negative
1532 // (sub nsw negative, non-negative) --> negative
1533 case Instruction::Sub: {
1534 if (BO->getOperand(0) != I)
1535 break;
1536 if (Known2.isNonNegative() && Known3.isNegative())
1537 Known.makeNonNegative();
1538 else if (Known2.isNegative() && Known3.isNonNegative())
1539 Known.makeNegative();
1540 break;
1541 }
1542
1543 // (mul nsw non-negative, non-negative) --> non-negative
1544 case Instruction::Mul:
1545 if (Known2.isNonNegative() && Known3.isNonNegative())
1546 Known.makeNonNegative();
1547 break;
1548
1549 default:
1550 break;
1551 }
1552 break;
1553 }
1554
1555 default:
1556 break;
1557 }
1558 }
1559
1560 // Unreachable blocks may have zero-operand PHI nodes.
1561 if (P->getNumIncomingValues() == 0)
1562 break;
1563
1564 // Otherwise take the unions of the known bit sets of the operands,
1565 // taking conservative care to avoid excessive recursion.
1566 if (Depth < MaxAnalysisRecursionDepth - 1 && Known.isUnknown()) {
1567 // Skip if every incoming value references to ourself.
1568 if (isa_and_nonnull<UndefValue>(P->hasConstantValue()))
1569 break;
1570
1571 Known.Zero.setAllBits();
1572 Known.One.setAllBits();
1573 for (unsigned u = 0, e = P->getNumIncomingValues(); u < e; ++u) {
1574 Value *IncValue = P->getIncomingValue(u);
1575 // Skip direct self references.
1576 if (IncValue == P) continue;
1577
1578 // If the Use is a select of this phi, use the knownbit of the other
1579 // operand to break the recursion.
1580 if (auto *SI = dyn_cast<SelectInst>(IncValue)) {
1581 if (SI->getTrueValue() == P || SI->getFalseValue() == P)
1582 IncValue = SI->getTrueValue() == P ? SI->getFalseValue()
1583 : SI->getTrueValue();
1584 }
1585
1586 // Change the context instruction to the "edge" that flows into the
1587 // phi. This is important because that is where the value is actually
1588 // "evaluated" even though it is used later somewhere else. (see also
1589 // D69571).
1591 RecQ.CxtI = P->getIncomingBlock(u)->getTerminator();
1592
1593 Known2 = KnownBits(BitWidth);
1594
1595 // Recurse, but cap the recursion to one level, because we don't
1596 // want to waste time spinning around in loops.
1597 // TODO: See if we can base recursion limiter on number of incoming phi
1598 // edges so we don't overly clamp analysis.
1599 computeKnownBits(IncValue, DemandedElts, Known2,
1600 MaxAnalysisRecursionDepth - 1, RecQ);
1601
1602 // See if we can further use a conditional branch into the phi
1603 // to help us determine the range of the value.
1604 if (!Known2.isConstant()) {
1605 CmpPredicate Pred;
1606 const APInt *RHSC;
1607 BasicBlock *TrueSucc, *FalseSucc;
1608 // TODO: Use RHS Value and compute range from its known bits.
1609 if (match(RecQ.CxtI,
1610 m_Br(m_c_ICmp(Pred, m_Specific(IncValue), m_APInt(RHSC)),
1611 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) {
1612 // Check for cases of duplicate successors.
1613 if ((TrueSucc == P->getParent()) != (FalseSucc == P->getParent())) {
1614 // If we're using the false successor, invert the predicate.
1615 if (FalseSucc == P->getParent())
1616 Pred = CmpInst::getInversePredicate(Pred);
1617 // Get the knownbits implied by the incoming phi condition.
1618 auto CR = ConstantRange::makeExactICmpRegion(Pred, *RHSC);
1619 KnownBits KnownUnion = Known2.unionWith(CR.toKnownBits());
1620 // We can have conflicts here if we are analyzing deadcode (its
1621 // impossible for us reach this BB based the icmp).
1622 if (KnownUnion.hasConflict()) {
1623 // No reason to continue analyzing in a known dead region, so
1624 // just resetAll and break. This will cause us to also exit the
1625 // outer loop.
1626 Known.resetAll();
1627 break;
1628 }
1629 Known2 = KnownUnion;
1630 }
1631 }
1632 }
1633
1634 Known = Known.intersectWith(Known2);
1635 // If all bits have been ruled out, there's no need to check
1636 // more operands.
1637 if (Known.isUnknown())
1638 break;
1639 }
1640 }
1641 break;
1642 }
1643 case Instruction::Call:
1644 case Instruction::Invoke: {
1645 // If range metadata is attached to this call, set known bits from that,
1646 // and then intersect with known bits based on other properties of the
1647 // function.
1648 if (MDNode *MD =
1649 Q.IIQ.getMetadata(cast<Instruction>(I), LLVMContext::MD_range))
1651
1652 const auto *CB = cast<CallBase>(I);
1653
1654 if (std::optional<ConstantRange> Range = CB->getRange())
1655 Known = Known.unionWith(Range->toKnownBits());
1656
1657 if (const Value *RV = CB->getReturnedArgOperand()) {
1658 if (RV->getType() == I->getType()) {
1659 computeKnownBits(RV, Known2, Depth + 1, Q);
1660 Known = Known.unionWith(Known2);
1661 // If the function doesn't return properly for all input values
1662 // (e.g. unreachable exits) then there might be conflicts between the
1663 // argument value and the range metadata. Simply discard the known bits
1664 // in case of conflicts.
1665 if (Known.hasConflict())
1666 Known.resetAll();
1667 }
1668 }
1669 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1670 switch (II->getIntrinsicID()) {
1671 default:
1672 break;
1673 case Intrinsic::abs: {
1674 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1675 bool IntMinIsPoison = match(II->getArgOperand(1), m_One());
1676 Known = Known2.abs(IntMinIsPoison);
1677 break;
1678 }
1679 case Intrinsic::bitreverse:
1680 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1681 Known.Zero |= Known2.Zero.reverseBits();
1682 Known.One |= Known2.One.reverseBits();
1683 break;
1684 case Intrinsic::bswap:
1685 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1686 Known.Zero |= Known2.Zero.byteSwap();
1687 Known.One |= Known2.One.byteSwap();
1688 break;
1689 case Intrinsic::ctlz: {
1690 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1691 // If we have a known 1, its position is our upper bound.
1692 unsigned PossibleLZ = Known2.countMaxLeadingZeros();
1693 // If this call is poison for 0 input, the result will be less than 2^n.
1694 if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1695 PossibleLZ = std::min(PossibleLZ, BitWidth - 1);
1696 unsigned LowBits = llvm::bit_width(PossibleLZ);
1697 Known.Zero.setBitsFrom(LowBits);
1698 break;
1699 }
1700 case Intrinsic::cttz: {
1701 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1702 // If we have a known 1, its position is our upper bound.
1703 unsigned PossibleTZ = Known2.countMaxTrailingZeros();
1704 // If this call is poison for 0 input, the result will be less than 2^n.
1705 if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1706 PossibleTZ = std::min(PossibleTZ, BitWidth - 1);
1707 unsigned LowBits = llvm::bit_width(PossibleTZ);
1708 Known.Zero.setBitsFrom(LowBits);
1709 break;
1710 }
1711 case Intrinsic::ctpop: {
1712 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1713 // We can bound the space the count needs. Also, bits known to be zero
1714 // can't contribute to the population.
1715 unsigned BitsPossiblySet = Known2.countMaxPopulation();
1716 unsigned LowBits = llvm::bit_width(BitsPossiblySet);
1717 Known.Zero.setBitsFrom(LowBits);
1718 // TODO: we could bound KnownOne using the lower bound on the number
1719 // of bits which might be set provided by popcnt KnownOne2.
1720 break;
1721 }
1722 case Intrinsic::fshr:
1723 case Intrinsic::fshl: {
1724 const APInt *SA;
1725 if (!match(I->getOperand(2), m_APInt(SA)))
1726 break;
1727
1728 // Normalize to funnel shift left.
1729 uint64_t ShiftAmt = SA->urem(BitWidth);
1730 if (II->getIntrinsicID() == Intrinsic::fshr)
1731 ShiftAmt = BitWidth - ShiftAmt;
1732
1733 KnownBits Known3(BitWidth);
1734 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1735 computeKnownBits(I->getOperand(1), DemandedElts, Known3, Depth + 1, Q);
1736
1737 Known.Zero =
1738 Known2.Zero.shl(ShiftAmt) | Known3.Zero.lshr(BitWidth - ShiftAmt);
1739 Known.One =
1740 Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt);
1741 break;
1742 }
1743 case Intrinsic::uadd_sat:
1744 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1745 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1746 Known = KnownBits::uadd_sat(Known, Known2);
1747 break;
1748 case Intrinsic::usub_sat:
1749 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1750 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1751 Known = KnownBits::usub_sat(Known, Known2);
1752 break;
1753 case Intrinsic::sadd_sat:
1754 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1755 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1756 Known = KnownBits::sadd_sat(Known, Known2);
1757 break;
1758 case Intrinsic::ssub_sat:
1759 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1760 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1761 Known = KnownBits::ssub_sat(Known, Known2);
1762 break;
1763 // Vec reverse preserves bits from input vec.
1764 case Intrinsic::vector_reverse:
1765 computeKnownBits(I->getOperand(0), DemandedElts.reverseBits(), Known,
1766 Depth + 1, Q);
1767 break;
1768 // for min/max/and/or reduce, any bit common to each element in the
1769 // input vec is set in the output.
1770 case Intrinsic::vector_reduce_and:
1771 case Intrinsic::vector_reduce_or:
1772 case Intrinsic::vector_reduce_umax:
1773 case Intrinsic::vector_reduce_umin:
1774 case Intrinsic::vector_reduce_smax:
1775 case Intrinsic::vector_reduce_smin:
1776 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1777 break;
1778 case Intrinsic::vector_reduce_xor: {
1779 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1780 // The zeros common to all vecs are zero in the output.
1781 // If the number of elements is odd, then the common ones remain. If the
1782 // number of elements is even, then the common ones becomes zeros.
1783 auto *VecTy = cast<VectorType>(I->getOperand(0)->getType());
1784 // Even, so the ones become zeros.
1785 bool EvenCnt = VecTy->getElementCount().isKnownEven();
1786 if (EvenCnt)
1787 Known.Zero |= Known.One;
1788 // Maybe even element count so need to clear ones.
1789 if (VecTy->isScalableTy() || EvenCnt)
1790 Known.One.clearAllBits();
1791 break;
1792 }
1793 case Intrinsic::umin:
1794 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1795 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1796 Known = KnownBits::umin(Known, Known2);
1797 break;
1798 case Intrinsic::umax:
1799 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1800 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1801 Known = KnownBits::umax(Known, Known2);
1802 break;
1803 case Intrinsic::smin:
1804 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1805 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1806 Known = KnownBits::smin(Known, Known2);
1807 break;
1808 case Intrinsic::smax:
1809 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1810 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1811 Known = KnownBits::smax(Known, Known2);
1812 break;
1813 case Intrinsic::ptrmask: {
1814 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1815
1816 const Value *Mask = I->getOperand(1);
1817 Known2 = KnownBits(Mask->getType()->getScalarSizeInBits());
1818 computeKnownBits(Mask, DemandedElts, Known2, Depth + 1, Q);
1819 // TODO: 1-extend would be more precise.
1820 Known &= Known2.anyextOrTrunc(BitWidth);
1821 break;
1822 }
1823 case Intrinsic::x86_sse2_pmulh_w:
1824 case Intrinsic::x86_avx2_pmulh_w:
1825 case Intrinsic::x86_avx512_pmulh_w_512:
1826 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1827 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1828 Known = KnownBits::mulhs(Known, Known2);
1829 break;
1830 case Intrinsic::x86_sse2_pmulhu_w:
1831 case Intrinsic::x86_avx2_pmulhu_w:
1832 case Intrinsic::x86_avx512_pmulhu_w_512:
1833 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1834 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1835 Known = KnownBits::mulhu(Known, Known2);
1836 break;
1837 case Intrinsic::x86_sse42_crc32_64_64:
1838 Known.Zero.setBitsFrom(32);
1839 break;
1840 case Intrinsic::x86_ssse3_phadd_d_128:
1841 case Intrinsic::x86_ssse3_phadd_w_128:
1842 case Intrinsic::x86_avx2_phadd_d:
1843 case Intrinsic::x86_avx2_phadd_w: {
1845 I, DemandedElts, Depth, Q,
1846 [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
1847 return KnownBits::add(KnownLHS, KnownRHS);
1848 });
1849 break;
1850 }
1851 case Intrinsic::x86_ssse3_phadd_sw_128:
1852 case Intrinsic::x86_avx2_phadd_sw: {
1853 Known = computeKnownBitsForHorizontalOperation(I, DemandedElts, Depth,
1855 break;
1856 }
1857 case Intrinsic::x86_ssse3_phsub_d_128:
1858 case Intrinsic::x86_ssse3_phsub_w_128:
1859 case Intrinsic::x86_avx2_phsub_d:
1860 case Intrinsic::x86_avx2_phsub_w: {
1862 I, DemandedElts, Depth, Q,
1863 [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
1864 return KnownBits::sub(KnownLHS, KnownRHS);
1865 });
1866 break;
1867 }
1868 case Intrinsic::x86_ssse3_phsub_sw_128:
1869 case Intrinsic::x86_avx2_phsub_sw: {
1870 Known = computeKnownBitsForHorizontalOperation(I, DemandedElts, Depth,
1872 break;
1873 }
1874 case Intrinsic::riscv_vsetvli:
1875 case Intrinsic::riscv_vsetvlimax: {
1876 bool HasAVL = II->getIntrinsicID() == Intrinsic::riscv_vsetvli;
1877 const ConstantRange Range = getVScaleRange(II->getFunction(), BitWidth);
1879 cast<ConstantInt>(II->getArgOperand(HasAVL))->getZExtValue());
1880 RISCVII::VLMUL VLMUL = static_cast<RISCVII::VLMUL>(
1881 cast<ConstantInt>(II->getArgOperand(1 + HasAVL))->getZExtValue());
1882 uint64_t MaxVLEN =
1884 uint64_t MaxVL = MaxVLEN / RISCVVType::getSEWLMULRatio(SEW, VLMUL);
1885
1886 // Result of vsetvli must be not larger than AVL.
1887 if (HasAVL)
1888 if (auto *CI = dyn_cast<ConstantInt>(II->getArgOperand(0)))
1889 MaxVL = std::min(MaxVL, CI->getZExtValue());
1890
1891 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
1892 if (BitWidth > KnownZeroFirstBit)
1893 Known.Zero.setBitsFrom(KnownZeroFirstBit);
1894 break;
1895 }
1896 case Intrinsic::vscale: {
1897 if (!II->getParent() || !II->getFunction())
1898 break;
1899
1900 Known = getVScaleRange(II->getFunction(), BitWidth).toKnownBits();
1901 break;
1902 }
1903 }
1904 }
1905 break;
1906 }
1907 case Instruction::ShuffleVector: {
1908 auto *Shuf = dyn_cast<ShuffleVectorInst>(I);
1909 // FIXME: Do we need to handle ConstantExpr involving shufflevectors?
1910 if (!Shuf) {
1911 Known.resetAll();
1912 return;
1913 }
1914 // For undef elements, we don't know anything about the common state of
1915 // the shuffle result.
1916 APInt DemandedLHS, DemandedRHS;
1917 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) {
1918 Known.resetAll();
1919 return;
1920 }
1921 Known.One.setAllBits();
1922 Known.Zero.setAllBits();
1923 if (!!DemandedLHS) {
1924 const Value *LHS = Shuf->getOperand(0);
1925 computeKnownBits(LHS, DemandedLHS, Known, Depth + 1, Q);
1926 // If we don't know any bits, early out.
1927 if (Known.isUnknown())
1928 break;
1929 }
1930 if (!!DemandedRHS) {
1931 const Value *RHS = Shuf->getOperand(1);
1932 computeKnownBits(RHS, DemandedRHS, Known2, Depth + 1, Q);
1933 Known = Known.intersectWith(Known2);
1934 }
1935 break;
1936 }
1937 case Instruction::InsertElement: {
1938 if (isa<ScalableVectorType>(I->getType())) {
1939 Known.resetAll();
1940 return;
1941 }
1942 const Value *Vec = I->getOperand(0);
1943 const Value *Elt = I->getOperand(1);
1944 auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2));
1945 unsigned NumElts = DemandedElts.getBitWidth();
1946 APInt DemandedVecElts = DemandedElts;
1947 bool NeedsElt = true;
1948 // If we know the index we are inserting too, clear it from Vec check.
1949 if (CIdx && CIdx->getValue().ult(NumElts)) {
1950 DemandedVecElts.clearBit(CIdx->getZExtValue());
1951 NeedsElt = DemandedElts[CIdx->getZExtValue()];
1952 }
1953
1954 Known.One.setAllBits();
1955 Known.Zero.setAllBits();
1956 if (NeedsElt) {
1957 computeKnownBits(Elt, Known, Depth + 1, Q);
1958 // If we don't know any bits, early out.
1959 if (Known.isUnknown())
1960 break;
1961 }
1962
1963 if (!DemandedVecElts.isZero()) {
1964 computeKnownBits(Vec, DemandedVecElts, Known2, Depth + 1, Q);
1965 Known = Known.intersectWith(Known2);
1966 }
1967 break;
1968 }
1969 case Instruction::ExtractElement: {
1970 // Look through extract element. If the index is non-constant or
1971 // out-of-range demand all elements, otherwise just the extracted element.
1972 const Value *Vec = I->getOperand(0);
1973 const Value *Idx = I->getOperand(1);
1974 auto *CIdx = dyn_cast<ConstantInt>(Idx);
1975 if (isa<ScalableVectorType>(Vec->getType())) {
1976 // FIXME: there's probably *something* we can do with scalable vectors
1977 Known.resetAll();
1978 break;
1979 }
1980 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1981 APInt DemandedVecElts = APInt::getAllOnes(NumElts);
1982 if (CIdx && CIdx->getValue().ult(NumElts))
1983 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
1984 computeKnownBits(Vec, DemandedVecElts, Known, Depth + 1, Q);
1985 break;
1986 }
1987 case Instruction::ExtractValue:
1988 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {
1989 const ExtractValueInst *EVI = cast<ExtractValueInst>(I);
1990 if (EVI->getNumIndices() != 1) break;
1991 if (EVI->getIndices()[0] == 0) {
1992 switch (II->getIntrinsicID()) {
1993 default: break;
1994 case Intrinsic::uadd_with_overflow:
1995 case Intrinsic::sadd_with_overflow:
1997 true, II->getArgOperand(0), II->getArgOperand(1), /*NSW=*/false,
1998 /* NUW=*/false, DemandedElts, Known, Known2, Depth, Q);
1999 break;
2000 case Intrinsic::usub_with_overflow:
2001 case Intrinsic::ssub_with_overflow:
2003 false, II->getArgOperand(0), II->getArgOperand(1), /*NSW=*/false,
2004 /* NUW=*/false, DemandedElts, Known, Known2, Depth, Q);
2005 break;
2006 case Intrinsic::umul_with_overflow:
2007 case Intrinsic::smul_with_overflow:
2008 computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false,
2009 false, DemandedElts, Known, Known2, Depth, Q);
2010 break;
2011 }
2012 }
2013 }
2014 break;
2015 case Instruction::Freeze:
2016 if (isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT,
2017 Depth + 1))
2018 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
2019 break;
2020 }
2021}
2022
2023/// Determine which bits of V are known to be either zero or one and return
2024/// them.
2025KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
2026 unsigned Depth, const SimplifyQuery &Q) {
2027 KnownBits Known(getBitWidth(V->getType(), Q.DL));
2028 ::computeKnownBits(V, DemandedElts, Known, Depth, Q);
2029 return Known;
2030}
2031
2032/// Determine which bits of V are known to be either zero or one and return
2033/// them.
2035 const SimplifyQuery &Q) {
2036 KnownBits Known(getBitWidth(V->getType(), Q.DL));
2037 computeKnownBits(V, Known, Depth, Q);
2038 return Known;
2039}
2040
2041/// Determine which bits of V are known to be either zero or one and return
2042/// them in the Known bit set.
2043///
2044/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that
2045/// we cannot optimize based on the assumption that it is zero without changing
2046/// it to be an explicit zero. If we don't change it to zero, other code could
2047/// optimized based on the contradictory assumption that it is non-zero.
2048/// Because instcombine aggressively folds operations with undef args anyway,
2049/// this won't lose us code quality.
2050///
2051/// This function is defined on values with integer type, values with pointer
2052/// type, and vectors of integers. In the case
2053/// where V is a vector, known zero, and known one values are the
2054/// same width as the vector element, and the bit is set only if it is true
2055/// for all of the demanded elements in the vector specified by DemandedElts.
2056void computeKnownBits(const Value *V, const APInt &DemandedElts,
2057 KnownBits &Known, unsigned Depth,
2058 const SimplifyQuery &Q) {
2059 if (!DemandedElts) {
2060 // No demanded elts, better to assume we don't know anything.
2061 Known.resetAll();
2062 return;
2063 }
2064
2065 assert(V && "No Value?");
2066 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
2067
2068#ifndef NDEBUG
2069 Type *Ty = V->getType();
2070 unsigned BitWidth = Known.getBitWidth();
2071
2073 "Not integer or pointer type!");
2074
2075 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
2076 assert(
2077 FVTy->getNumElements() == DemandedElts.getBitWidth() &&
2078 "DemandedElt width should equal the fixed vector number of elements");
2079 } else {
2080 assert(DemandedElts == APInt(1, 1) &&
2081 "DemandedElt width should be 1 for scalars or scalable vectors");
2082 }
2083
2084 Type *ScalarTy = Ty->getScalarType();
2085 if (ScalarTy->isPointerTy()) {
2086 assert(BitWidth == Q.DL.getPointerTypeSizeInBits(ScalarTy) &&
2087 "V and Known should have same BitWidth");
2088 } else {
2089 assert(BitWidth == Q.DL.getTypeSizeInBits(ScalarTy) &&
2090 "V and Known should have same BitWidth");
2091 }
2092#endif
2093
2094 const APInt *C;
2095 if (match(V, m_APInt(C))) {
2096 // We know all of the bits for a scalar constant or a splat vector constant!
2097 Known = KnownBits::makeConstant(*C);
2098 return;
2099 }
2100 // Null and aggregate-zero are all-zeros.
2101 if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) {
2102 Known.setAllZero();
2103 return;
2104 }
2105 // Handle a constant vector by taking the intersection of the known bits of
2106 // each element.
2107 if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(V)) {
2108 assert(!isa<ScalableVectorType>(V->getType()));
2109 // We know that CDV must be a vector of integers. Take the intersection of
2110 // each element.
2111 Known.Zero.setAllBits(); Known.One.setAllBits();
2112 for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) {
2113 if (!DemandedElts[i])
2114 continue;
2115 APInt Elt = CDV->getElementAsAPInt(i);
2116 Known.Zero &= ~Elt;
2117 Known.One &= Elt;
2118 }
2119 if (Known.hasConflict())
2120 Known.resetAll();
2121 return;
2122 }
2123
2124 if (const auto *CV = dyn_cast<ConstantVector>(V)) {
2125 assert(!isa<ScalableVectorType>(V->getType()));
2126 // We know that CV must be a vector of integers. Take the intersection of
2127 // each element.
2128 Known.Zero.setAllBits(); Known.One.setAllBits();
2129 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
2130 if (!DemandedElts[i])
2131 continue;
2132 Constant *Element = CV->getAggregateElement(i);
2133 if (isa<PoisonValue>(Element))
2134 continue;
2135 auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
2136 if (!ElementCI) {
2137 Known.resetAll();
2138 return;
2139 }
2140 const APInt &Elt = ElementCI->getValue();
2141 Known.Zero &= ~Elt;
2142 Known.One &= Elt;
2143 }
2144 if (Known.hasConflict())
2145 Known.resetAll();
2146 return;
2147 }
2148
2149 // Start out not knowing anything.
2150 Known.resetAll();
2151
2152 // We can't imply anything about undefs.
2153 if (isa<UndefValue>(V))
2154 return;
2155
2156 // There's no point in looking through other users of ConstantData for
2157 // assumptions. Confirm that we've handled them all.
2158 assert(!isa<ConstantData>(V) && "Unhandled constant data!");
2159
2160 if (const auto *A = dyn_cast<Argument>(V))
2161 if (std::optional<ConstantRange> Range = A->getRange())
2162 Known = Range->toKnownBits();
2163
2164 // All recursive calls that increase depth must come after this.
2166 return;
2167
2168 // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
2169 // the bits of its aliasee.
2170 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2171 if (!GA->isInterposable())
2172 computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q);
2173 return;
2174 }
2175
2176 if (const Operator *I = dyn_cast<Operator>(V))
2177 computeKnownBitsFromOperator(I, DemandedElts, Known, Depth, Q);
2178 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
2179 if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange())
2180 Known = CR->toKnownBits();
2181 }
2182
2183 // Aligned pointers have trailing zeros - refine Known.Zero set
2184 if (isa<PointerType>(V->getType())) {
2185 Align Alignment = V->getPointerAlignment(Q.DL);
2186 Known.Zero.setLowBits(Log2(Alignment));
2187 }
2188
2189 // computeKnownBitsFromContext strictly refines Known.
2190 // Therefore, we run them after computeKnownBitsFromOperator.
2191
2192 // Check whether we can determine known bits from context such as assumes.
2193 computeKnownBitsFromContext(V, Known, Depth, Q);
2194}
2195
2196/// Try to detect a recurrence that the value of the induction variable is
2197/// always a power of two (or zero).
2198static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero,
2199 unsigned Depth, SimplifyQuery &Q) {
2200 BinaryOperator *BO = nullptr;
2201 Value *Start = nullptr, *Step = nullptr;
2202 if (!matchSimpleRecurrence(PN, BO, Start, Step))
2203 return false;
2204
2205 // Initial value must be a power of two.
2206 for (const Use &U : PN->operands()) {
2207 if (U.get() == Start) {
2208 // Initial value comes from a different BB, need to adjust context
2209 // instruction for analysis.
2210 Q.CxtI = PN->getIncomingBlock(U)->getTerminator();
2211 if (!isKnownToBeAPowerOfTwo(Start, OrZero, Depth, Q))
2212 return false;
2213 }
2214 }
2215
2216 // Except for Mul, the induction variable must be on the left side of the
2217 // increment expression, otherwise its value can be arbitrary.
2218 if (BO->getOpcode() != Instruction::Mul && BO->getOperand(1) != Step)
2219 return false;
2220
2221 Q.CxtI = BO->getParent()->getTerminator();
2222 switch (BO->getOpcode()) {
2223 case Instruction::Mul:
2224 // Power of two is closed under multiplication.
2225 return (OrZero || Q.IIQ.hasNoUnsignedWrap(BO) ||
2226 Q.IIQ.hasNoSignedWrap(BO)) &&
2227 isKnownToBeAPowerOfTwo(Step, OrZero, Depth, Q);
2228 case Instruction::SDiv:
2229 // Start value must not be signmask for signed division, so simply being a
2230 // power of two is not sufficient, and it has to be a constant.
2231 if (!match(Start, m_Power2()) || match(Start, m_SignMask()))
2232 return false;
2233 [[fallthrough]];
2234 case Instruction::UDiv:
2235 // Divisor must be a power of two.
2236 // If OrZero is false, cannot guarantee induction variable is non-zero after
2237 // division, same for Shr, unless it is exact division.
2238 return (OrZero || Q.IIQ.isExact(BO)) &&
2239 isKnownToBeAPowerOfTwo(Step, false, Depth, Q);
2240 case Instruction::Shl:
2241 return OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO);
2242 case Instruction::AShr:
2243 if (!match(Start, m_Power2()) || match(Start, m_SignMask()))
2244 return false;
2245 [[fallthrough]];
2246 case Instruction::LShr:
2247 return OrZero || Q.IIQ.isExact(BO);
2248 default:
2249 return false;
2250 }
2251}
2252
2253/// Return true if we can infer that \p V is known to be a power of 2 from
2254/// dominating condition \p Cond (e.g., ctpop(V) == 1).
2255static bool isImpliedToBeAPowerOfTwoFromCond(const Value *V, bool OrZero,
2256 const Value *Cond,
2257 bool CondIsTrue) {
2258 CmpPredicate Pred;
2259 const APInt *RHSC;
2260 if (!match(Cond, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Specific(V)),
2261 m_APInt(RHSC))))
2262 return false;
2263 if (!CondIsTrue)
2264 Pred = ICmpInst::getInversePredicate(Pred);
2265 // ctpop(V) u< 2
2266 if (OrZero && Pred == ICmpInst::ICMP_ULT && *RHSC == 2)
2267 return true;
2268 // ctpop(V) == 1
2269 return Pred == ICmpInst::ICMP_EQ && *RHSC == 1;
2270}
2271
2272/// Return true if the given value is known to have exactly one
2273/// bit set when defined. For vectors return true if every element is known to
2274/// be a power of two when defined. Supports values with integer or pointer
2275/// types and vectors of integers.
2276bool llvm::isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
2277 const SimplifyQuery &Q) {
2278 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
2279
2280 if (isa<Constant>(V))
2281 return OrZero ? match(V, m_Power2OrZero()) : match(V, m_Power2());
2282
2283 // i1 is by definition a power of 2 or zero.
2284 if (OrZero && V->getType()->getScalarSizeInBits() == 1)
2285 return true;
2286
2287 // Try to infer from assumptions.
2288 if (Q.AC && Q.CxtI) {
2289 for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
2290 if (!AssumeVH)
2291 continue;
2292 CallInst *I = cast<CallInst>(AssumeVH);
2293 if (isImpliedToBeAPowerOfTwoFromCond(V, OrZero, I->getArgOperand(0),
2294 /*CondIsTrue=*/true) &&
2296 return true;
2297 }
2298 }
2299
2300 // Handle dominating conditions.
2301 if (Q.DC && Q.CxtI && Q.DT) {
2302 for (BranchInst *BI : Q.DC->conditionsFor(V)) {
2303 Value *Cond = BI->getCondition();
2304
2305 BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
2307 /*CondIsTrue=*/true) &&
2308 Q.DT->dominates(Edge0, Q.CxtI->getParent()))
2309 return true;
2310
2311 BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
2313 /*CondIsTrue=*/false) &&
2314 Q.DT->dominates(Edge1, Q.CxtI->getParent()))
2315 return true;
2316 }
2317 }
2318
2319 auto *I = dyn_cast<Instruction>(V);
2320 if (!I)
2321 return false;
2322
2323 if (Q.CxtI && match(V, m_VScale())) {
2324 const Function *F = Q.CxtI->getFunction();
2325 // The vscale_range indicates vscale is a power-of-two.
2326 return F->hasFnAttribute(Attribute::VScaleRange);
2327 }
2328
2329 // 1 << X is clearly a power of two if the one is not shifted off the end. If
2330 // it is shifted off the end then the result is undefined.
2331 if (match(I, m_Shl(m_One(), m_Value())))
2332 return true;
2333
2334 // (signmask) >>l X is clearly a power of two if the one is not shifted off
2335 // the bottom. If it is shifted off the bottom then the result is undefined.
2336 if (match(I, m_LShr(m_SignMask(), m_Value())))
2337 return true;
2338
2339 // The remaining tests are all recursive, so bail out if we hit the limit.
2341 return false;
2342
2343 switch (I->getOpcode()) {
2344 case Instruction::ZExt:
2345 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2346 case Instruction::Trunc:
2347 return OrZero && isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2348 case Instruction::Shl:
2349 if (OrZero || Q.IIQ.hasNoUnsignedWrap(I) || Q.IIQ.hasNoSignedWrap(I))
2350 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2351 return false;
2352 case Instruction::LShr:
2353 if (OrZero || Q.IIQ.isExact(cast<BinaryOperator>(I)))
2354 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2355 return false;
2356 case Instruction::UDiv:
2357 if (Q.IIQ.isExact(cast<BinaryOperator>(I)))
2358 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2359 return false;
2360 case Instruction::Mul:
2361 return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) &&
2362 isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q) &&
2363 (OrZero || isKnownNonZero(I, Q, Depth));
2364 case Instruction::And:
2365 // A power of two and'd with anything is a power of two or zero.
2366 if (OrZero &&
2367 (isKnownToBeAPowerOfTwo(I->getOperand(1), /*OrZero*/ true, Depth, Q) ||
2368 isKnownToBeAPowerOfTwo(I->getOperand(0), /*OrZero*/ true, Depth, Q)))
2369 return true;
2370 // X & (-X) is always a power of two or zero.
2371 if (match(I->getOperand(0), m_Neg(m_Specific(I->getOperand(1)))) ||
2372 match(I->getOperand(1), m_Neg(m_Specific(I->getOperand(0)))))
2373 return OrZero || isKnownNonZero(I->getOperand(0), Q, Depth);
2374 return false;
2375 case Instruction::Add: {
2376 // Adding a power-of-two or zero to the same power-of-two or zero yields
2377 // either the original power-of-two, a larger power-of-two or zero.
2378 const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
2379 if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO) ||
2380 Q.IIQ.hasNoSignedWrap(VOBO)) {
2381 if (match(I->getOperand(0),
2382 m_c_And(m_Specific(I->getOperand(1)), m_Value())) &&
2383 isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q))
2384 return true;
2385 if (match(I->getOperand(1),
2386 m_c_And(m_Specific(I->getOperand(0)), m_Value())) &&
2387 isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q))
2388 return true;
2389
2390 unsigned BitWidth = V->getType()->getScalarSizeInBits();
2391 KnownBits LHSBits(BitWidth);
2392 computeKnownBits(I->getOperand(0), LHSBits, Depth, Q);
2393
2394 KnownBits RHSBits(BitWidth);
2395 computeKnownBits(I->getOperand(1), RHSBits, Depth, Q);
2396 // If i8 V is a power of two or zero:
2397 // ZeroBits: 1 1 1 0 1 1 1 1
2398 // ~ZeroBits: 0 0 0 1 0 0 0 0
2399 if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2())
2400 // If OrZero isn't set, we cannot give back a zero result.
2401 // Make sure either the LHS or RHS has a bit set.
2402 if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue())
2403 return true;
2404 }
2405
2406 // LShr(UINT_MAX, Y) + 1 is a power of two (if add is nuw) or zero.
2407 if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO))
2408 if (match(I, m_Add(m_LShr(m_AllOnes(), m_Value()), m_One())))
2409 return true;
2410 return false;
2411 }
2412 case Instruction::Select:
2413 return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) &&
2414 isKnownToBeAPowerOfTwo(I->getOperand(2), OrZero, Depth, Q);
2415 case Instruction::PHI: {
2416 // A PHI node is power of two if all incoming values are power of two, or if
2417 // it is an induction variable where in each step its value is a power of
2418 // two.
2419 auto *PN = cast<PHINode>(I);
2421
2422 // Check if it is an induction variable and always power of two.
2423 if (isPowerOfTwoRecurrence(PN, OrZero, Depth, RecQ))
2424 return true;
2425
2426 // Recursively check all incoming values. Limit recursion to 2 levels, so
2427 // that search complexity is limited to number of operands^2.
2428 unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
2429 return llvm::all_of(PN->operands(), [&](const Use &U) {
2430 // Value is power of 2 if it is coming from PHI node itself by induction.
2431 if (U.get() == PN)
2432 return true;
2433
2434 // Change the context instruction to the incoming block where it is
2435 // evaluated.
2436 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
2437 return isKnownToBeAPowerOfTwo(U.get(), OrZero, NewDepth, RecQ);
2438 });
2439 }
2440 case Instruction::Invoke:
2441 case Instruction::Call: {
2442 if (auto *II = dyn_cast<IntrinsicInst>(I)) {
2443 switch (II->getIntrinsicID()) {
2444 case Intrinsic::umax:
2445 case Intrinsic::smax:
2446 case Intrinsic::umin:
2447 case Intrinsic::smin:
2448 return isKnownToBeAPowerOfTwo(II->getArgOperand(1), OrZero, Depth, Q) &&
2449 isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q);
2450 // bswap/bitreverse just move around bits, but don't change any 1s/0s
2451 // thus dont change pow2/non-pow2 status.
2452 case Intrinsic::bitreverse:
2453 case Intrinsic::bswap:
2454 return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q);
2455 case Intrinsic::fshr:
2456 case Intrinsic::fshl:
2457 // If Op0 == Op1, this is a rotate. is_pow2(rotate(x, y)) == is_pow2(x)
2458 if (II->getArgOperand(0) == II->getArgOperand(1))
2459 return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q);
2460 break;
2461 default:
2462 break;
2463 }
2464 }
2465 return false;
2466 }
2467 default:
2468 return false;
2469 }
2470}
2471
2472/// Test whether a GEP's result is known to be non-null.
2473///
2474/// Uses properties inherent in a GEP to try to determine whether it is known
2475/// to be non-null.
2476///
2477/// Currently this routine does not support vector GEPs.
2478static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
2479 const SimplifyQuery &Q) {
2480 const Function *F = nullptr;
2481 if (const Instruction *I = dyn_cast<Instruction>(GEP))
2482 F = I->getFunction();
2483
2484 // If the gep is nuw or inbounds with invalid null pointer, then the GEP
2485 // may be null iff the base pointer is null and the offset is zero.
2486 if (!GEP->hasNoUnsignedWrap() &&
2487 !(GEP->isInBounds() &&
2488 !NullPointerIsDefined(F, GEP->getPointerAddressSpace())))
2489 return false;
2490
2491 // FIXME: Support vector-GEPs.
2492 assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP");
2493
2494 // If the base pointer is non-null, we cannot walk to a null address with an
2495 // inbounds GEP in address space zero.
2496 if (isKnownNonZero(GEP->getPointerOperand(), Q, Depth))
2497 return true;
2498
2499 // Walk the GEP operands and see if any operand introduces a non-zero offset.
2500 // If so, then the GEP cannot produce a null pointer, as doing so would
2501 // inherently violate the inbounds contract within address space zero.
2503 GTI != GTE; ++GTI) {
2504 // Struct types are easy -- they must always be indexed by a constant.
2505 if (StructType *STy = GTI.getStructTypeOrNull()) {
2506 ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
2507 unsigned ElementIdx = OpC->getZExtValue();
2508 const StructLayout *SL = Q.DL.getStructLayout(STy);
2509 uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
2510 if (ElementOffset > 0)
2511 return true;
2512 continue;
2513 }
2514
2515 // If we have a zero-sized type, the index doesn't matter. Keep looping.
2516 if (GTI.getSequentialElementStride(Q.DL).isZero())
2517 continue;
2518
2519 // Fast path the constant operand case both for efficiency and so we don't
2520 // increment Depth when just zipping down an all-constant GEP.
2521 if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) {
2522 if (!OpC->isZero())
2523 return true;
2524 continue;
2525 }
2526
2527 // We post-increment Depth here because while isKnownNonZero increments it
2528 // as well, when we pop back up that increment won't persist. We don't want
2529 // to recurse 10k times just because we have 10k GEP operands. We don't
2530 // bail completely out because we want to handle constant GEPs regardless
2531 // of depth.
2533 continue;
2534
2535 if (isKnownNonZero(GTI.getOperand(), Q, Depth))
2536 return true;
2537 }
2538
2539 return false;
2540}
2541
2543 const Instruction *CtxI,
2544 const DominatorTree *DT) {
2545 assert(!isa<Constant>(V) && "Called for constant?");
2546
2547 if (!CtxI || !DT)
2548 return false;
2549
2550 unsigned NumUsesExplored = 0;
2551 for (const auto *U : V->users()) {
2552 // Avoid massive lists
2553 if (NumUsesExplored >= DomConditionsMaxUses)
2554 break;
2555 NumUsesExplored++;
2556
2557 // If the value is used as an argument to a call or invoke, then argument
2558 // attributes may provide an answer about null-ness.
2559 if (const auto *CB = dyn_cast<CallBase>(U))
2560 if (auto *CalledFunc = CB->getCalledFunction())
2561 for (const Argument &Arg : CalledFunc->args())
2562 if (CB->getArgOperand(Arg.getArgNo()) == V &&
2563 Arg.hasNonNullAttr(/* AllowUndefOrPoison */ false) &&
2564 DT->dominates(CB, CtxI))
2565 return true;
2566
2567 // If the value is used as a load/store, then the pointer must be non null.
2568 if (V == getLoadStorePointerOperand(U)) {
2569 const Instruction *I = cast<Instruction>(U);
2570 if (!NullPointerIsDefined(I->getFunction(),
2571 V->getType()->getPointerAddressSpace()) &&
2572 DT->dominates(I, CtxI))
2573 return true;
2574 }
2575
2576 if ((match(U, m_IDiv(m_Value(), m_Specific(V))) ||
2577 match(U, m_IRem(m_Value(), m_Specific(V)))) &&
2578 isValidAssumeForContext(cast<Instruction>(U), CtxI, DT))
2579 return true;
2580
2581 // Consider only compare instructions uniquely controlling a branch
2582 Value *RHS;
2583 CmpPredicate Pred;
2584 if (!match(U, m_c_ICmp(Pred, m_Specific(V), m_Value(RHS))))
2585 continue;
2586
2587 bool NonNullIfTrue;
2588 if (cmpExcludesZero(Pred, RHS))
2589 NonNullIfTrue = true;
2591 NonNullIfTrue = false;
2592 else
2593 continue;
2594
2597 for (const auto *CmpU : U->users()) {
2598 assert(WorkList.empty() && "Should be!");
2599 if (Visited.insert(CmpU).second)
2600 WorkList.push_back(CmpU);
2601
2602 while (!WorkList.empty()) {
2603 auto *Curr = WorkList.pop_back_val();
2604
2605 // If a user is an AND, add all its users to the work list. We only
2606 // propagate "pred != null" condition through AND because it is only
2607 // correct to assume that all conditions of AND are met in true branch.
2608 // TODO: Support similar logic of OR and EQ predicate?
2609 if (NonNullIfTrue)
2610 if (match(Curr, m_LogicalAnd(m_Value(), m_Value()))) {
2611 for (const auto *CurrU : Curr->users())
2612 if (Visited.insert(CurrU).second)
2613 WorkList.push_back(CurrU);
2614 continue;
2615 }
2616
2617 if (const BranchInst *BI = dyn_cast<BranchInst>(Curr)) {
2618 assert(BI->isConditional() && "uses a comparison!");
2619
2620 BasicBlock *NonNullSuccessor =
2621 BI->getSuccessor(NonNullIfTrue ? 0 : 1);
2622 BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
2623 if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
2624 return true;
2625 } else if (NonNullIfTrue && isGuard(Curr) &&
2626 DT->dominates(cast<Instruction>(Curr), CtxI)) {
2627 return true;
2628 }
2629 }
2630 }
2631 }
2632
2633 return false;
2634}
2635
2636/// Does the 'Range' metadata (which must be a valid MD_range operand list)
2637/// ensure that the value it's attached to is never Value? 'RangeType' is
2638/// is the type of the value described by the range.
2639static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) {
2640 const unsigned NumRanges = Ranges->getNumOperands() / 2;
2641 assert(NumRanges >= 1);
2642 for (unsigned i = 0; i < NumRanges; ++i) {
2644 mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 0));
2646 mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 1));
2647 ConstantRange Range(Lower->getValue(), Upper->getValue());
2648 if (Range.contains(Value))
2649 return false;
2650 }
2651 return true;
2652}
2653
2654/// Try to detect a recurrence that monotonically increases/decreases from a
2655/// non-zero starting value. These are common as induction variables.
2656static bool isNonZeroRecurrence(const PHINode *PN) {
2657 BinaryOperator *BO = nullptr;
2658 Value *Start = nullptr, *Step = nullptr;
2659 const APInt *StartC, *StepC;
2660 if (!matchSimpleRecurrence(PN, BO, Start, Step) ||
2661 !match(Start, m_APInt(StartC)) || StartC->isZero())
2662 return false;
2663
2664 switch (BO->getOpcode()) {
2665 case Instruction::Add:
2666 // Starting from non-zero and stepping away from zero can never wrap back
2667 // to zero.
2668 return BO->hasNoUnsignedWrap() ||
2669 (BO->hasNoSignedWrap() && match(Step, m_APInt(StepC)) &&
2670 StartC->isNegative() == StepC->isNegative());
2671 case Instruction::Mul:
2672 return (BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap()) &&
2673 match(Step, m_APInt(StepC)) && !StepC->isZero();
2674 case Instruction::Shl:
2675 return BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap();
2676 case Instruction::AShr:
2677 case Instruction::LShr:
2678 return BO->isExact();
2679 default:
2680 return false;
2681 }
2682}
2683
2684static bool matchOpWithOpEqZero(Value *Op0, Value *Op1) {
2685 return match(Op0, m_ZExtOrSExt(m_SpecificICmp(ICmpInst::ICMP_EQ,
2686 m_Specific(Op1), m_Zero()))) ||
2687 match(Op1, m_ZExtOrSExt(m_SpecificICmp(ICmpInst::ICMP_EQ,
2688 m_Specific(Op0), m_Zero())));
2689}
2690
2691static bool isNonZeroAdd(const APInt &DemandedElts, unsigned Depth,
2692 const SimplifyQuery &Q, unsigned BitWidth, Value *X,
2693 Value *Y, bool NSW, bool NUW) {
2694 // (X + (X != 0)) is non zero
2695 if (matchOpWithOpEqZero(X, Y))
2696 return true;
2697
2698 if (NUW)
2699 return isKnownNonZero(Y, DemandedElts, Q, Depth) ||
2700 isKnownNonZero(X, DemandedElts, Q, Depth);
2701
2702 KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q);
2703 KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q);
2704
2705 // If X and Y are both non-negative (as signed values) then their sum is not
2706 // zero unless both X and Y are zero.
2707 if (XKnown.isNonNegative() && YKnown.isNonNegative())
2708 if (isKnownNonZero(Y, DemandedElts, Q, Depth) ||
2709 isKnownNonZero(X, DemandedElts, Q, Depth))
2710 return true;
2711
2712 // If X and Y are both negative (as signed values) then their sum is not
2713 // zero unless both X and Y equal INT_MIN.
2714 if (XKnown.isNegative() && YKnown.isNegative()) {
2716 // The sign bit of X is set. If some other bit is set then X is not equal
2717 // to INT_MIN.
2718 if (XKnown.One.intersects(Mask))
2719 return true;
2720 // The sign bit of Y is set. If some other bit is set then Y is not equal
2721 // to INT_MIN.
2722 if (YKnown.One.intersects(Mask))
2723 return true;
2724 }
2725
2726 // The sum of a non-negative number and a power of two is not zero.
2727 if (XKnown.isNonNegative() &&
2728 isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q))
2729 return true;
2730 if (YKnown.isNonNegative() &&
2731 isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q))
2732 return true;
2733
2734 return KnownBits::add(XKnown, YKnown, NSW, NUW).isNonZero();
2735}
2736
2737static bool isNonZeroSub(const APInt &DemandedElts, unsigned Depth,
2738 const SimplifyQuery &Q, unsigned BitWidth, Value *X,
2739 Value *Y) {
2740 // (X - (X != 0)) is non zero
2741 // ((X != 0) - X) is non zero
2742 if (matchOpWithOpEqZero(X, Y))
2743 return true;
2744
2745 // TODO: Move this case into isKnownNonEqual().
2746 if (auto *C = dyn_cast<Constant>(X))
2747 if (C->isNullValue() && isKnownNonZero(Y, DemandedElts, Q, Depth))
2748 return true;
2749
2750 return ::isKnownNonEqual(X, Y, DemandedElts, Depth, Q);
2751}
2752
2753static bool isNonZeroMul(const APInt &DemandedElts, unsigned Depth,
2754 const SimplifyQuery &Q, unsigned BitWidth, Value *X,
2755 Value *Y, bool NSW, bool NUW) {
2756 // If X and Y are non-zero then so is X * Y as long as the multiplication
2757 // does not overflow.
2758 if (NSW || NUW)
2759 return isKnownNonZero(X, DemandedElts, Q, Depth) &&
2760 isKnownNonZero(Y, DemandedElts, Q, Depth);
2761
2762 // If either X or Y is odd, then if the other is non-zero the result can't
2763 // be zero.
2764 KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q);
2765 if (XKnown.One[0])
2766 return isKnownNonZero(Y, DemandedElts, Q, Depth);
2767
2768 KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q);
2769 if (YKnown.One[0])
2770 return XKnown.isNonZero() || isKnownNonZero(X, DemandedElts, Q, Depth);
2771
2772 // If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is
2773 // non-zero, then X * Y is non-zero. We can find sX and sY by just taking
2774 // the lowest known One of X and Y. If they are non-zero, the result
2775 // must be non-zero. We can check if LSB(X) * LSB(Y) != 0 by doing
2776 // X.CountLeadingZeros + Y.CountLeadingZeros < BitWidth.
2777 return (XKnown.countMaxTrailingZeros() + YKnown.countMaxTrailingZeros()) <
2778 BitWidth;
2779}
2780
2781static bool isNonZeroShift(const Operator *I, const APInt &DemandedElts,
2782 unsigned Depth, const SimplifyQuery &Q,
2783 const KnownBits &KnownVal) {
2784 auto ShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
2785 switch (I->getOpcode()) {
2786 case Instruction::Shl:
2787 return Lhs.shl(Rhs);
2788 case Instruction::LShr:
2789 return Lhs.lshr(Rhs);
2790 case Instruction::AShr:
2791 return Lhs.ashr(Rhs);
2792 default:
2793 llvm_unreachable("Unknown Shift Opcode");
2794 }
2795 };
2796
2797 auto InvShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
2798 switch (I->getOpcode()) {
2799 case Instruction::Shl:
2800 return Lhs.lshr(Rhs);
2801 case Instruction::LShr:
2802 case Instruction::AShr:
2803 return Lhs.shl(Rhs);
2804 default:
2805 llvm_unreachable("Unknown Shift Opcode");
2806 }
2807 };
2808
2809 if (KnownVal.isUnknown())
2810 return false;
2811
2812 KnownBits KnownCnt =
2813 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q);
2814 APInt MaxShift = KnownCnt.getMaxValue();
2815 unsigned NumBits = KnownVal.getBitWidth();
2816 if (MaxShift.uge(NumBits))
2817 return false;
2818
2819 if (!ShiftOp(KnownVal.One, MaxShift).isZero())
2820 return true;
2821
2822 // If all of the bits shifted out are known to be zero, and Val is known
2823 // non-zero then at least one non-zero bit must remain.
2824 if (InvShiftOp(KnownVal.Zero, NumBits - MaxShift)
2825 .eq(InvShiftOp(APInt::getAllOnes(NumBits), NumBits - MaxShift)) &&
2826 isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth))
2827 return true;
2828
2829 return false;
2830}
2831
2833 const APInt &DemandedElts,
2834 unsigned Depth, const SimplifyQuery &Q) {
2835 unsigned BitWidth = getBitWidth(I->getType()->getScalarType(), Q.DL);
2836 switch (I->getOpcode()) {
2837 case Instruction::Alloca:
2838 // Alloca never returns null, malloc might.
2839 return I->getType()->getPointerAddressSpace() == 0;
2840 case Instruction::GetElementPtr:
2841 if (I->getType()->isPointerTy())
2842 return isGEPKnownNonNull(cast<GEPOperator>(I), Depth, Q);
2843 break;
2844 case Instruction::BitCast: {
2845 // We need to be a bit careful here. We can only peek through the bitcast
2846 // if the scalar size of elements in the operand are smaller than and a
2847 // multiple of the size they are casting too. Take three cases:
2848 //
2849 // 1) Unsafe:
2850 // bitcast <2 x i16> %NonZero to <4 x i8>
2851 //
2852 // %NonZero can have 2 non-zero i16 elements, but isKnownNonZero on a
2853 // <4 x i8> requires that all 4 i8 elements be non-zero which isn't
2854 // guranteed (imagine just sign bit set in the 2 i16 elements).
2855 //
2856 // 2) Unsafe:
2857 // bitcast <4 x i3> %NonZero to <3 x i4>
2858 //
2859 // Even though the scalar size of the src (`i3`) is smaller than the
2860 // scalar size of the dst `i4`, because `i3` is not a multiple of `i4`
2861 // its possible for the `3 x i4` elements to be zero because there are
2862 // some elements in the destination that don't contain any full src
2863 // element.
2864 //
2865 // 3) Safe:
2866 // bitcast <4 x i8> %NonZero to <2 x i16>
2867 //
2868 // This is always safe as non-zero in the 4 i8 elements implies
2869 // non-zero in the combination of any two adjacent ones. Since i8 is a
2870 // multiple of i16, each i16 is guranteed to have 2 full i8 elements.
2871 // This all implies the 2 i16 elements are non-zero.
2872 Type *FromTy = I->getOperand(0)->getType();
2873 if ((FromTy->isIntOrIntVectorTy() || FromTy->isPtrOrPtrVectorTy()) &&
2874 (BitWidth % getBitWidth(FromTy->getScalarType(), Q.DL)) == 0)
2875 return isKnownNonZero(I->getOperand(0), Q, Depth);
2876 } break;
2877 case Instruction::IntToPtr:
2878 // Note that we have to take special care to avoid looking through
2879 // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well
2880 // as casts that can alter the value, e.g., AddrSpaceCasts.
2881 if (!isa<ScalableVectorType>(I->getType()) &&
2882 Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <=
2883 Q.DL.getTypeSizeInBits(I->getType()).getFixedValue())
2884 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2885 break;
2886 case Instruction::PtrToInt:
2887 // Similar to int2ptr above, we can look through ptr2int here if the cast
2888 // is a no-op or an extend and not a truncate.
2889 if (!isa<ScalableVectorType>(I->getType()) &&
2890 Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <=
2891 Q.DL.getTypeSizeInBits(I->getType()).getFixedValue())
2892 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2893 break;
2894 case Instruction::Trunc:
2895 // nuw/nsw trunc preserves zero/non-zero status of input.
2896 if (auto *TI = dyn_cast<TruncInst>(I))
2897 if (TI->hasNoSignedWrap() || TI->hasNoUnsignedWrap())
2898 return isKnownNonZero(TI->getOperand(0), DemandedElts, Q, Depth);
2899 break;
2900
2901 case Instruction::Sub:
2902 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth, I->getOperand(0),
2903 I->getOperand(1));
2904 case Instruction::Xor:
2905 // (X ^ (X != 0)) is non zero
2906 if (matchOpWithOpEqZero(I->getOperand(0), I->getOperand(1)))
2907 return true;
2908 break;
2909 case Instruction::Or:
2910 // (X | (X != 0)) is non zero
2911 if (matchOpWithOpEqZero(I->getOperand(0), I->getOperand(1)))
2912 return true;
2913 // X | Y != 0 if X != 0 or Y != 0.
2914 return isKnownNonZero(I->getOperand(1), DemandedElts, Q, Depth) ||
2915 isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2916 case Instruction::SExt:
2917 case Instruction::ZExt:
2918 // ext X != 0 if X != 0.
2919 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2920
2921 case Instruction::Shl: {
2922 // shl nsw/nuw can't remove any non-zero bits.
2923 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I);
2924 if (Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO))
2925 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2926
2927 // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined
2928 // if the lowest bit is shifted off the end.
2929 KnownBits Known(BitWidth);
2930 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth, Q);
2931 if (Known.One[0])
2932 return true;
2933
2934 return isNonZeroShift(I, DemandedElts, Depth, Q, Known);
2935 }
2936 case Instruction::LShr:
2937 case Instruction::AShr: {
2938 // shr exact can only shift out zero bits.
2939 const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(I);
2940 if (BO->isExact())
2941 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2942
2943 // shr X, Y != 0 if X is negative. Note that the value of the shift is not
2944 // defined if the sign bit is shifted off the end.
2945 KnownBits Known =
2946 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q);
2947 if (Known.isNegative())
2948 return true;
2949
2950 return isNonZeroShift(I, DemandedElts, Depth, Q, Known);
2951 }
2952 case Instruction::UDiv:
2953 case Instruction::SDiv: {
2954 // X / Y
2955 // div exact can only produce a zero if the dividend is zero.
2956 if (cast<PossiblyExactOperator>(I)->isExact())
2957 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2958
2959 KnownBits XKnown =
2960 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q);
2961 // If X is fully unknown we won't be able to figure anything out so don't
2962 // both computing knownbits for Y.
2963 if (XKnown.isUnknown())
2964 return false;
2965
2966 KnownBits YKnown =
2967 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q);
2968 if (I->getOpcode() == Instruction::SDiv) {
2969 // For signed division need to compare abs value of the operands.
2970 XKnown = XKnown.abs(/*IntMinIsPoison*/ false);
2971 YKnown = YKnown.abs(/*IntMinIsPoison*/ false);
2972 }
2973 // If X u>= Y then div is non zero (0/0 is UB).
2974 std::optional<bool> XUgeY = KnownBits::uge(XKnown, YKnown);
2975 // If X is total unknown or X u< Y we won't be able to prove non-zero
2976 // with compute known bits so just return early.
2977 return XUgeY && *XUgeY;
2978 }
2979 case Instruction::Add: {
2980 // X + Y.
2981
2982 // If Add has nuw wrap flag, then if either X or Y is non-zero the result is
2983 // non-zero.
2984 auto *BO = cast<OverflowingBinaryOperator>(I);
2985 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth, I->getOperand(0),
2986 I->getOperand(1), Q.IIQ.hasNoSignedWrap(BO),
2987 Q.IIQ.hasNoUnsignedWrap(BO));
2988 }
2989 case Instruction::Mul: {
2990 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I);
2991 return isNonZeroMul(DemandedElts, Depth, Q, BitWidth, I->getOperand(0),
2992 I->getOperand(1), Q.IIQ.hasNoSignedWrap(BO),
2993 Q.IIQ.hasNoUnsignedWrap(BO));
2994 }
2995 case Instruction::Select: {
2996 // (C ? X : Y) != 0 if X != 0 and Y != 0.
2997
2998 // First check if the arm is non-zero using `isKnownNonZero`. If that fails,
2999 // then see if the select condition implies the arm is non-zero. For example
3000 // (X != 0 ? X : Y), we know the true arm is non-zero as the `X` "return" is
3001 // dominated by `X != 0`.
3002 auto SelectArmIsNonZero = [&](bool IsTrueArm) {
3003 Value *Op;
3004 Op = IsTrueArm ? I->getOperand(1) : I->getOperand(2);
3005 // Op is trivially non-zero.
3006 if (isKnownNonZero(Op, DemandedElts, Q, Depth))
3007 return true;
3008
3009 // The condition of the select dominates the true/false arm. Check if the
3010 // condition implies that a given arm is non-zero.
3011 Value *X;
3012 CmpPredicate Pred;
3013 if (!match(I->getOperand(0), m_c_ICmp(Pred, m_Specific(Op), m_Value(X))))
3014 return false;
3015
3016 if (!IsTrueArm)
3017 Pred = ICmpInst::getInversePredicate(Pred);
3018
3019 return cmpExcludesZero(Pred, X);
3020 };
3021
3022 if (SelectArmIsNonZero(/* IsTrueArm */ true) &&
3023 SelectArmIsNonZero(/* IsTrueArm */ false))
3024 return true;
3025 break;
3026 }
3027 case Instruction::PHI: {
3028 auto *PN = cast<PHINode>(I);
3030 return true;
3031
3032 // Check if all incoming values are non-zero using recursion.
3034 unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
3035 return llvm::all_of(PN->operands(), [&](const Use &U) {
3036 if (U.get() == PN)
3037 return true;
3038 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
3039 // Check if the branch on the phi excludes zero.
3040 CmpPredicate Pred;
3041 Value *X;
3042 BasicBlock *TrueSucc, *FalseSucc;
3043 if (match(RecQ.CxtI,
3044 m_Br(m_c_ICmp(Pred, m_Specific(U.get()), m_Value(X)),
3045 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) {
3046 // Check for cases of duplicate successors.
3047 if ((TrueSucc == PN->getParent()) != (FalseSucc == PN->getParent())) {
3048 // If we're using the false successor, invert the predicate.
3049 if (FalseSucc == PN->getParent())
3050 Pred = CmpInst::getInversePredicate(Pred);
3051 if (cmpExcludesZero(Pred, X))
3052 return true;
3053 }
3054 }
3055 // Finally recurse on the edge and check it directly.
3056 return isKnownNonZero(U.get(), DemandedElts, RecQ, NewDepth);
3057 });
3058 }
3059 case Instruction::InsertElement: {
3060 if (isa<ScalableVectorType>(I->getType()))
3061 break;
3062
3063 const Value *Vec = I->getOperand(0);
3064 const Value *Elt = I->getOperand(1);
3065 auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2));
3066
3067 unsigned NumElts = DemandedElts.getBitWidth();
3068 APInt DemandedVecElts = DemandedElts;
3069 bool SkipElt = false;
3070 // If we know the index we are inserting too, clear it from Vec check.
3071 if (CIdx && CIdx->getValue().ult(NumElts)) {
3072 DemandedVecElts.clearBit(CIdx->getZExtValue());
3073 SkipElt = !DemandedElts[CIdx->getZExtValue()];
3074 }
3075
3076 // Result is zero if Elt is non-zero and rest of the demanded elts in Vec
3077 // are non-zero.
3078 return (SkipElt || isKnownNonZero(Elt, Q, Depth)) &&
3079 (DemandedVecElts.isZero() ||
3080 isKnownNonZero(Vec, DemandedVecElts, Q, Depth));
3081 }
3082 case Instruction::ExtractElement:
3083 if (const auto *EEI = dyn_cast<ExtractElementInst>(I)) {
3084 const Value *Vec = EEI->getVectorOperand();
3085 const Value *Idx = EEI->getIndexOperand();
3086 auto *CIdx = dyn_cast<ConstantInt>(Idx);
3087 if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) {
3088 unsigned NumElts = VecTy->getNumElements();
3089 APInt DemandedVecElts = APInt::getAllOnes(NumElts);
3090 if (CIdx && CIdx->getValue().ult(NumElts))
3091 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
3092 return isKnownNonZero(Vec, DemandedVecElts, Q, Depth);
3093 }
3094 }
3095 break;
3096 case Instruction::ShuffleVector: {
3097 auto *Shuf = dyn_cast<ShuffleVectorInst>(I);
3098 if (!Shuf)
3099 break;
3100 APInt DemandedLHS, DemandedRHS;
3101 // For undef elements, we don't know anything about the common state of
3102 // the shuffle result.
3103 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
3104 break;
3105 // If demanded elements for both vecs are non-zero, the shuffle is non-zero.
3106 return (DemandedRHS.isZero() ||
3107 isKnownNonZero(Shuf->getOperand(1), DemandedRHS, Q, Depth)) &&
3108 (DemandedLHS.isZero() ||
3109 isKnownNonZero(Shuf->getOperand(0), DemandedLHS, Q, Depth));
3110 }
3111 case Instruction::Freeze:
3112 return isKnownNonZero(I->getOperand(0), Q, Depth) &&
3113 isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT,
3114 Depth);
3115 case Instruction::Load: {
3116 auto *LI = cast<LoadInst>(I);
3117 // A Load tagged with nonnull or dereferenceable with null pointer undefined
3118 // is never null.
3119 if (auto *PtrT = dyn_cast<PointerType>(I->getType())) {
3120 if (Q.IIQ.getMetadata(LI, LLVMContext::MD_nonnull) ||
3121 (Q.IIQ.getMetadata(LI, LLVMContext::MD_dereferenceable) &&
3122 !NullPointerIsDefined(LI->getFunction(), PtrT->getAddressSpace())))
3123 return true;
3124 } else if (MDNode *Ranges = Q.IIQ.getMetadata(LI, LLVMContext::MD_range)) {
3126 }
3127
3128 // No need to fall through to computeKnownBits as range metadata is already
3129 // handled in isKnownNonZero.
3130 return false;
3131 }
3132 case Instruction::ExtractValue: {
3133 const WithOverflowInst *WO;
3134 if (match(I, m_ExtractValue<0>(m_WithOverflowInst(WO)))) {
3135 switch (WO->getBinaryOp()) {
3136 default:
3137 break;
3138 case Instruction::Add:
3139 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth,
3140 WO->getArgOperand(0), WO->getArgOperand(1),
3141 /*NSW=*/false,
3142 /*NUW=*/false);
3143 case Instruction::Sub:
3144 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth,
3145 WO->getArgOperand(0), WO->getArgOperand(1));
3146 case Instruction::Mul:
3147 return isNonZeroMul(DemandedElts, Depth, Q, BitWidth,
3148 WO->getArgOperand(0), WO->getArgOperand(1),
3149 /*NSW=*/false, /*NUW=*/false);
3150 break;
3151 }
3152 }
3153 break;
3154 }
3155 case Instruction::Call:
3156 case Instruction::Invoke: {
3157 const auto *Call = cast<CallBase>(I);
3158 if (I->getType()->isPointerTy()) {
3159 if (Call->isReturnNonNull())
3160 return true;
3161 if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true))
3162 return isKnownNonZero(RP, Q, Depth);
3163 } else {
3164 if (MDNode *Ranges = Q.IIQ.getMetadata(Call, LLVMContext::MD_range))
3166 if (std::optional<ConstantRange> Range = Call->getRange()) {
3167 const APInt ZeroValue(Range->getBitWidth(), 0);
3168 if (!Range->contains(ZeroValue))
3169 return true;
3170 }
3171 if (const Value *RV = Call->getReturnedArgOperand())
3172 if (RV->getType() == I->getType() && isKnownNonZero(RV, Q, Depth))
3173 return true;
3174 }
3175
3176 if (auto *II = dyn_cast<IntrinsicInst>(I)) {
3177 switch (II->getIntrinsicID()) {
3178 case Intrinsic::sshl_sat:
3179 case Intrinsic::ushl_sat:
3180 case Intrinsic::abs:
3181 case Intrinsic::bitreverse:
3182 case Intrinsic::bswap:
3183 case Intrinsic::ctpop:
3184 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth);
3185 // NB: We don't do usub_sat here as in any case we can prove its
3186 // non-zero, we will fold it to `sub nuw` in InstCombine.
3187 case Intrinsic::ssub_sat:
3188 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth,
3189 II->getArgOperand(0), II->getArgOperand(1));
3190 case Intrinsic::sadd_sat:
3191 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth,
3192 II->getArgOperand(0), II->getArgOperand(1),
3193 /*NSW=*/true, /* NUW=*/false);
3194 // Vec reverse preserves zero/non-zero status from input vec.
3195 case Intrinsic::vector_reverse:
3196 return isKnownNonZero(II->getArgOperand(0), DemandedElts.reverseBits(),
3197 Q, Depth);
3198 // umin/smin/smax/smin/or of all non-zero elements is always non-zero.
3199 case Intrinsic::vector_reduce_or:
3200 case Intrinsic::vector_reduce_umax:
3201 case Intrinsic::vector_reduce_umin:
3202 case Intrinsic::vector_reduce_smax:
3203 case Intrinsic::vector_reduce_smin:
3204 return isKnownNonZero(II->getArgOperand(0), Q, Depth);
3205 case Intrinsic::umax:
3206 case Intrinsic::uadd_sat:
3207 // umax(X, (X != 0)) is non zero
3208 // X +usat (X != 0) is non zero
3209 if (matchOpWithOpEqZero(II->getArgOperand(0), II->getArgOperand(1)))
3210 return true;
3211
3212 return isKnownNonZero(II->getArgOperand(1), DemandedElts, Q, Depth) ||
3213 isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth);
3214 case Intrinsic::smax: {
3215 // If either arg is strictly positive the result is non-zero. Otherwise
3216 // the result is non-zero if both ops are non-zero.
3217 auto IsNonZero = [&](Value *Op, std::optional<bool> &OpNonZero,
3218 const KnownBits &OpKnown) {
3219 if (!OpNonZero.has_value())
3220 OpNonZero = OpKnown.isNonZero() ||
3221 isKnownNonZero(Op, DemandedElts, Q, Depth);
3222 return *OpNonZero;
3223 };
3224 // Avoid re-computing isKnownNonZero.
3225 std::optional<bool> Op0NonZero, Op1NonZero;
3226 KnownBits Op1Known =
3227 computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q);
3228 if (Op1Known.isNonNegative() &&
3229 IsNonZero(II->getArgOperand(1), Op1NonZero, Op1Known))
3230 return true;
3231 KnownBits Op0Known =
3232 computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q);
3233 if (Op0Known.isNonNegative() &&
3234 IsNonZero(II->getArgOperand(0), Op0NonZero, Op0Known))
3235 return true;
3236 return IsNonZero(II->getArgOperand(1), Op1NonZero, Op1Known) &&
3237 IsNonZero(II->getArgOperand(0), Op0NonZero, Op0Known);
3238 }
3239 case Intrinsic::smin: {
3240 // If either arg is negative the result is non-zero. Otherwise
3241 // the result is non-zero if both ops are non-zero.
3242 KnownBits Op1Known =
3243 computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q);
3244 if (Op1Known.isNegative())
3245 return true;
3246 KnownBits Op0Known =
3247 computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q);
3248 if (Op0Known.isNegative())
3249 return true;
3250
3251 if (Op1Known.isNonZero() && Op0Known.isNonZero())
3252 return true;
3253 }
3254 [[fallthrough]];
3255 case Intrinsic::umin:
3256 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth) &&
3257 isKnownNonZero(II->getArgOperand(1), DemandedElts, Q, Depth);
3258 case Intrinsic::cttz:
3259 return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q)
3260 .Zero[0];
3261 case Intrinsic::ctlz:
3262 return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q)
3263 .isNonNegative();
3264 case Intrinsic::fshr:
3265 case Intrinsic::fshl:
3266 // If Op0 == Op1, this is a rotate. rotate(x, y) != 0 iff x != 0.
3267 if (II->getArgOperand(0) == II->getArgOperand(1))
3268 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth);
3269 break;
3270 case Intrinsic::vscale:
3271 return true;
3272 case Intrinsic::experimental_get_vector_length:
3273 return isKnownNonZero(I->getOperand(0), Q, Depth);
3274 default:
3275 break;
3276 }
3277 break;
3278 }
3279
3280 return false;
3281 }
3282 }
3283
3284 KnownBits Known(BitWidth);
3285 computeKnownBits(I, DemandedElts, Known, Depth, Q);
3286 return Known.One != 0;
3287}
3288
3289/// Return true if the given value is known to be non-zero when defined. For
3290/// vectors, return true if every demanded element is known to be non-zero when
3291/// defined. For pointers, if the context instruction and dominator tree are
3292/// specified, perform context-sensitive analysis and return true if the
3293/// pointer couldn't possibly be null at the specified instruction.
3294/// Supports values with integer or pointer type and vectors of integers.
3295bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
3296 const SimplifyQuery &Q, unsigned Depth) {
3297 Type *Ty = V->getType();
3298
3299#ifndef NDEBUG
3300 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
3301
3302 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
3303 assert(
3304 FVTy->getNumElements() == DemandedElts.getBitWidth() &&
3305 "DemandedElt width should equal the fixed vector number of elements");
3306 } else {
3307 assert(DemandedElts == APInt(1, 1) &&
3308 "DemandedElt width should be 1 for scalars");
3309 }
3310#endif
3311
3312 if (auto *C = dyn_cast<Constant>(V)) {
3313 if (C->isNullValue())
3314 return false;
3315 if (isa<ConstantInt>(C))
3316 // Must be non-zero due to null test above.
3317 return true;
3318
3319 // For constant vectors, check that all elements are poison or known
3320 // non-zero to determine that the whole vector is known non-zero.
3321 if (auto *VecTy = dyn_cast<FixedVectorType>(Ty)) {
3322 for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) {
3323 if (!DemandedElts[i])
3324 continue;
3325 Constant *Elt = C->getAggregateElement(i);
3326 if (!Elt || Elt->isNullValue())
3327 return false;
3328 if (!isa<PoisonValue>(Elt) && !isa<ConstantInt>(Elt))
3329 return false;
3330 }
3331 return true;
3332 }
3333
3334 // Constant ptrauth can be null, iff the base pointer can be.
3335 if (auto *CPA = dyn_cast<ConstantPtrAuth>(V))
3336 return isKnownNonZero(CPA->getPointer(), DemandedElts, Q, Depth);
3337
3338 // A global variable in address space 0 is non null unless extern weak
3339 // or an absolute symbol reference. Other address spaces may have null as a
3340 // valid address for a global, so we can't assume anything.
3341 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
3342 if (!GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() &&
3343 GV->getType()->getAddressSpace() == 0)
3344 return true;
3345 }
3346
3347 // For constant expressions, fall through to the Operator code below.
3348 if (!isa<ConstantExpr>(V))
3349 return false;
3350 }
3351
3352 if (const auto *A = dyn_cast<Argument>(V))
3353 if (std::optional<ConstantRange> Range = A->getRange()) {
3354 const APInt ZeroValue(Range->getBitWidth(), 0);
3355 if (!Range->contains(ZeroValue))
3356 return true;
3357 }
3358
3359 if (!isa<Constant>(V) && isKnownNonZeroFromAssume(V, Q))
3360 return true;
3361
3362 // Some of the tests below are recursive, so bail out if we hit the limit.
3364 return false;
3365
3366 // Check for pointer simplifications.
3367
3368 if (PointerType *PtrTy = dyn_cast<PointerType>(Ty)) {
3369 // A byval, inalloca may not be null in a non-default addres space. A
3370 // nonnull argument is assumed never 0.
3371 if (const Argument *A = dyn_cast<Argument>(V)) {
3372 if (((A->hasPassPointeeByValueCopyAttr() &&
3373 !NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())) ||
3374 A->hasNonNullAttr()))
3375 return true;
3376 }
3377 }
3378
3379 if (const auto *I = dyn_cast<Operator>(V))
3380 if (isKnownNonZeroFromOperator(I, DemandedElts, Depth, Q))
3381 return true;
3382
3383 if (!isa<Constant>(V) &&
3385 return true;
3386
3387 return false;
3388}
3389
3391 unsigned Depth) {
3392 auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
3393 APInt DemandedElts =
3394 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
3395 return ::isKnownNonZero(V, DemandedElts, Q, Depth);
3396}
3397
3398/// If the pair of operators are the same invertible function, return the
3399/// the operands of the function corresponding to each input. Otherwise,
3400/// return std::nullopt. An invertible function is one that is 1-to-1 and maps
3401/// every input value to exactly one output value. This is equivalent to
3402/// saying that Op1 and Op2 are equal exactly when the specified pair of
3403/// operands are equal, (except that Op1 and Op2 may be poison more often.)
3404static std::optional<std::pair<Value*, Value*>>
3406 const Operator *Op2) {
3407 if (Op1->getOpcode() != Op2->getOpcode())
3408 return std::nullopt;
3409
3410 auto getOperands = [&](unsigned OpNum) -> auto {
3411 return std::make_pair(Op1->getOperand(OpNum), Op2->getOperand(OpNum));
3412 };
3413
3414 switch (Op1->getOpcode()) {
3415 default:
3416 break;
3417 case Instruction::Or:
3418 if (!cast<PossiblyDisjointInst>(Op1)->isDisjoint() ||
3419 !cast<PossiblyDisjointInst>(Op2)->isDisjoint())
3420 break;
3421 [[fallthrough]];
3422 case Instruction::Xor:
3423 case Instruction::Add: {
3424 Value *Other;
3425 if (match(Op2, m_c_BinOp(m_Specific(Op1->getOperand(0)), m_Value(Other))))
3426 return std::make_pair(Op1->getOperand(1), Other);
3427 if (match(Op2, m_c_BinOp(m_Specific(Op1->getOperand(1)), m_Value(Other))))
3428 return std::make_pair(Op1->getOperand(0), Other);
3429 break;
3430 }
3431 case Instruction::Sub:
3432 if (Op1->getOperand(0) == Op2->getOperand(0))
3433 return getOperands(1);
3434 if (Op1->getOperand(1) == Op2->getOperand(1))
3435 return getOperands(0);
3436 break;
3437 case Instruction::Mul: {
3438 // invertible if A * B == (A * B) mod 2^N where A, and B are integers
3439 // and N is the bitwdith. The nsw case is non-obvious, but proven by
3440 // alive2: https://alive2.llvm.org/ce/z/Z6D5qK
3441 auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
3442 auto *OBO2 = cast<OverflowingBinaryOperator>(Op2);
3443 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) &&
3444 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap()))
3445 break;
3446
3447 // Assume operand order has been canonicalized
3448 if (Op1->getOperand(1) == Op2->getOperand(1) &&
3449 isa<ConstantInt>(Op1->getOperand(1)) &&
3450 !cast<ConstantInt>(Op1->getOperand(1))->isZero())
3451 return getOperands(0);
3452 break;
3453 }
3454 case Instruction::Shl: {
3455 // Same as multiplies, with the difference that we don't need to check
3456 // for a non-zero multiply. Shifts always multiply by non-zero.
3457 auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
3458 auto *OBO2 = cast<OverflowingBinaryOperator>(Op2);
3459 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) &&
3460 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap()))
3461 break;
3462
3463 if (Op1->getOperand(1) == Op2->getOperand(1))
3464 return getOperands(0);
3465 break;
3466 }
3467 case Instruction::AShr:
3468 case Instruction::LShr: {
3469 auto *PEO1 = cast<PossiblyExactOperator>(Op1);
3470 auto *PEO2 = cast<PossiblyExactOperator>(Op2);
3471 if (!PEO1->isExact() || !PEO2->isExact())
3472 break;
3473
3474 if (Op1->getOperand(1) == Op2->getOperand(1))
3475 return getOperands(0);
3476 break;
3477 }
3478 case Instruction::SExt:
3479 case Instruction::ZExt:
3480 if (Op1->getOperand(0)->getType() == Op2->getOperand(0)->getType())
3481 return getOperands(0);
3482 break;
3483 case Instruction::PHI: {
3484 const PHINode *PN1 = cast<PHINode>(Op1);
3485 const PHINode *PN2 = cast<PHINode>(Op2);
3486
3487 // If PN1 and PN2 are both recurrences, can we prove the entire recurrences
3488 // are a single invertible function of the start values? Note that repeated
3489 // application of an invertible function is also invertible
3490 BinaryOperator *BO1 = nullptr;
3491 Value *Start1 = nullptr, *Step1 = nullptr;
3492 BinaryOperator *BO2 = nullptr;
3493 Value *Start2 = nullptr, *Step2 = nullptr;
3494 if (PN1->getParent() != PN2->getParent() ||
3495 !matchSimpleRecurrence(PN1, BO1, Start1, Step1) ||
3496 !matchSimpleRecurrence(PN2, BO2, Start2, Step2))
3497 break;
3498
3499 auto Values = getInvertibleOperands(cast<Operator>(BO1),
3500 cast<Operator>(BO2));
3501 if (!Values)
3502 break;
3503
3504 // We have to be careful of mutually defined recurrences here. Ex:
3505 // * X_i = X_(i-1) OP Y_(i-1), and Y_i = X_(i-1) OP V
3506 // * X_i = Y_i = X_(i-1) OP Y_(i-1)
3507 // The invertibility of these is complicated, and not worth reasoning
3508 // about (yet?).
3509 if (Values->first != PN1 || Values->second != PN2)
3510 break;
3511
3512 return std::make_pair(Start1, Start2);
3513 }
3514 }
3515 return std::nullopt;
3516}
3517
3518/// Return true if V1 == (binop V2, X), where X is known non-zero.
3519/// Only handle a small subset of binops where (binop V2, X) with non-zero X
3520/// implies V2 != V1.
3521static bool isModifyingBinopOfNonZero(const Value *V1, const Value *V2,
3522 const APInt &DemandedElts, unsigned Depth,
3523 const SimplifyQuery &Q) {
3524 const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
3525 if (!BO)
3526 return false;
3527 switch (BO->getOpcode()) {
3528 default:
3529 break;
3530 case Instruction::Or:
3531 if (!cast<PossiblyDisjointInst>(V1)->isDisjoint())
3532 break;
3533 [[fallthrough]];
3534 case Instruction::Xor:
3535 case Instruction::Add:
3536 Value *Op = nullptr;
3537 if (V2 == BO->getOperand(0))
3538 Op = BO->getOperand(1);
3539 else if (V2 == BO->getOperand(1))
3540 Op = BO->getOperand(0);
3541 else
3542 return false;
3543 return isKnownNonZero(Op, DemandedElts, Q, Depth + 1);
3544 }
3545 return false;
3546}
3547
3548/// Return true if V2 == V1 * C, where V1 is known non-zero, C is not 0/1 and
3549/// the multiplication is nuw or nsw.
3550static bool isNonEqualMul(const Value *V1, const Value *V2,
3551 const APInt &DemandedElts, unsigned Depth,
3552 const SimplifyQuery &Q) {
3553 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) {
3554 const APInt *C;
3555 return match(OBO, m_Mul(m_Specific(V1), m_APInt(C))) &&
3556 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
3557 !C->isZero() && !C->isOne() &&
3558 isKnownNonZero(V1, DemandedElts, Q, Depth + 1);
3559 }
3560 return false;
3561}
3562
3563/// Return true if V2 == V1 << C, where V1 is known non-zero, C is not 0 and
3564/// the shift is nuw or nsw.
3565static bool isNonEqualShl(const Value *V1, const Value *V2,
3566 const APInt &DemandedElts, unsigned Depth,
3567 const SimplifyQuery &Q) {
3568 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) {
3569 const APInt *C;
3570 return match(OBO, m_Shl(m_Specific(V1), m_APInt(C))) &&
3571 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
3572 !C->isZero() && isKnownNonZero(V1, DemandedElts, Q, Depth + 1);
3573 }
3574 return false;
3575}
3576
3577static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2,
3578 const APInt &DemandedElts, unsigned Depth,
3579 const SimplifyQuery &Q) {
3580 // Check two PHIs are in same block.
3581 if (PN1->getParent() != PN2->getParent())
3582 return false;
3583
3585 bool UsedFullRecursion = false;
3586 for (const BasicBlock *IncomBB : PN1->blocks()) {
3587 if (!VisitedBBs.insert(IncomBB).second)
3588 continue; // Don't reprocess blocks that we have dealt with already.
3589 const Value *IV1 = PN1->getIncomingValueForBlock(IncomBB);
3590 const Value *IV2 = PN2->getIncomingValueForBlock(IncomBB);
3591 const APInt *C1, *C2;
3592 if (match(IV1, m_APInt(C1)) && match(IV2, m_APInt(C2)) && *C1 != *C2)
3593 continue;
3594
3595 // Only one pair of phi operands is allowed for full recursion.
3596 if (UsedFullRecursion)
3597 return false;
3598
3600 RecQ.CxtI = IncomBB->getTerminator();
3601 if (!isKnownNonEqual(IV1, IV2, DemandedElts, Depth + 1, RecQ))
3602 return false;
3603 UsedFullRecursion = true;
3604 }
3605 return true;
3606}
3607
3608static bool isNonEqualSelect(const Value *V1, const Value *V2,
3609 const APInt &DemandedElts, unsigned Depth,
3610 const SimplifyQuery &Q) {
3611 const SelectInst *SI1 = dyn_cast<SelectInst>(V1);
3612 if (!SI1)
3613 return false;
3614
3615 if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) {
3616 const Value *Cond1 = SI1->getCondition();
3617 const Value *Cond2 = SI2->getCondition();
3618 if (Cond1 == Cond2)
3619 return isKnownNonEqual(SI1->getTrueValue(), SI2->getTrueValue(),
3620 DemandedElts, Depth + 1, Q) &&
3621 isKnownNonEqual(SI1->getFalseValue(), SI2->getFalseValue(),
3622 DemandedElts, Depth + 1, Q);
3623 }
3624 return isKnownNonEqual(SI1->getTrueValue(), V2, DemandedElts, Depth + 1, Q) &&
3625 isKnownNonEqual(SI1->getFalseValue(), V2, DemandedElts, Depth + 1, Q);
3626}
3627
3628// Check to see if A is both a GEP and is the incoming value for a PHI in the
3629// loop, and B is either a ptr or another GEP. If the PHI has 2 incoming values,
3630// one of them being the recursive GEP A and the other a ptr at same base and at
3631// the same/higher offset than B we are only incrementing the pointer further in
3632// loop if offset of recursive GEP is greater than 0.
3634 const SimplifyQuery &Q) {
3635 if (!A->getType()->isPointerTy() || !B->getType()->isPointerTy())
3636 return false;
3637
3638 auto *GEPA = dyn_cast<GEPOperator>(A);
3639 if (!GEPA || GEPA->getNumIndices() != 1 || !isa<Constant>(GEPA->idx_begin()))
3640 return false;
3641
3642 // Handle 2 incoming PHI values with one being a recursive GEP.
3643 auto *PN = dyn_cast<PHINode>(GEPA->getPointerOperand());
3644 if (!PN || PN->getNumIncomingValues() != 2)
3645 return false;
3646
3647 // Search for the recursive GEP as an incoming operand, and record that as
3648 // Step.
3649 Value *Start = nullptr;
3650 Value *Step = const_cast<Value *>(A);
3651 if (PN->getIncomingValue(0) == Step)
3652 Start = PN->getIncomingValue(1);
3653 else if (PN->getIncomingValue(1) == Step)
3654 Start = PN->getIncomingValue(0);
3655 else
3656 return false;
3657
3658 // Other incoming node base should match the B base.
3659 // StartOffset >= OffsetB && StepOffset > 0?
3660 // StartOffset <= OffsetB && StepOffset < 0?
3661 // Is non-equal if above are true.
3662 // We use stripAndAccumulateInBoundsConstantOffsets to restrict the
3663 // optimisation to inbounds GEPs only.
3664 unsigned IndexWidth = Q.DL.getIndexTypeSizeInBits(Start->getType());
3665 APInt StartOffset(IndexWidth, 0);
3666 Start = Start->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StartOffset);
3667 APInt StepOffset(IndexWidth, 0);
3668 Step = Step->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StepOffset);
3669
3670 // Check if Base Pointer of Step matches the PHI.
3671 if (Step != PN)
3672 return false;
3673 APInt OffsetB(IndexWidth, 0);
3674 B = B->stripAndAccumulateInBoundsConstantOffsets(Q.DL, OffsetB);
3675 return Start == B &&
3676 ((StartOffset.sge(OffsetB) && StepOffset.isStrictlyPositive()) ||
3677 (StartOffset.sle(OffsetB) && StepOffset.isNegative()));
3678}
3679
3680/// Return true if it is known that V1 != V2.
3681static bool isKnownNonEqual(const Value *V1, const Value *V2,
3682 const APInt &DemandedElts, unsigned Depth,
3683 const SimplifyQuery &Q) {
3684 if (V1 == V2)
3685 return false;
3686 if (V1->getType() != V2->getType())
3687 // We can't look through casts yet.
3688 return false;
3689
3691 return false;
3692
3693 // See if we can recurse through (exactly one of) our operands. This
3694 // requires our operation be 1-to-1 and map every input value to exactly
3695 // one output value. Such an operation is invertible.
3696 auto *O1 = dyn_cast<Operator>(V1);
3697 auto *O2 = dyn_cast<Operator>(V2);
3698 if (O1 && O2 && O1->getOpcode() == O2->getOpcode()) {
3699 if (auto Values = getInvertibleOperands(O1, O2))
3700 return isKnownNonEqual(Values->first, Values->second, DemandedElts,
3701 Depth + 1, Q);
3702
3703 if (const PHINode *PN1 = dyn_cast<PHINode>(V1)) {
3704 const PHINode *PN2 = cast<PHINode>(V2);
3705 // FIXME: This is missing a generalization to handle the case where one is
3706 // a PHI and another one isn't.
3707 if (isNonEqualPHIs(PN1, PN2, DemandedElts, Depth, Q))
3708 return true;
3709 };
3710 }
3711
3712 if (isModifyingBinopOfNonZero(V1, V2, DemandedElts, Depth, Q) ||
3713 isModifyingBinopOfNonZero(V2, V1, DemandedElts, Depth, Q))
3714 return true;
3715
3716 if (isNonEqualMul(V1, V2, DemandedElts, Depth, Q) ||
3717 isNonEqualMul(V2, V1, DemandedElts, Depth, Q))
3718 return true;
3719
3720 if (isNonEqualShl(V1, V2, DemandedElts, Depth, Q) ||
3721 isNonEqualShl(V2, V1, DemandedElts, Depth, Q))
3722 return true;
3723
3724 if (V1->getType()->isIntOrIntVectorTy()) {
3725 // Are any known bits in V1 contradictory to known bits in V2? If V1
3726 // has a known zero where V2 has a known one, they must not be equal.
3727 KnownBits Known1 = computeKnownBits(V1, DemandedElts, Depth, Q);
3728 if (!Known1.isUnknown()) {
3729 KnownBits Known2 = computeKnownBits(V2, DemandedElts, Depth, Q);
3730 if (Known1.Zero.intersects(Known2.One) ||
3731 Known2.Zero.intersects(Known1.One))
3732 return true;
3733 }
3734 }
3735
3736 if (isNonEqualSelect(V1, V2, DemandedElts, Depth, Q) ||
3737 isNonEqualSelect(V2, V1, DemandedElts, Depth, Q))
3738 return true;
3739
3740 if (isNonEqualPointersWithRecursiveGEP(V1, V2, Q) ||
3742 return true;
3743
3744 Value *A, *B;
3745 // PtrToInts are NonEqual if their Ptrs are NonEqual.
3746 // Check PtrToInt type matches the pointer size.
3747 if (match(V1, m_PtrToIntSameSize(Q.DL, m_Value(A))) &&
3749 return isKnownNonEqual(A, B, DemandedElts, Depth + 1, Q);
3750
3751 return false;
3752}
3753
3754// Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow).
3755// Returns the input and lower/upper bounds.
3756static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
3757 const APInt *&CLow, const APInt *&CHigh) {
3758 assert(isa<Operator>(Select) &&
3759 cast<Operator>(Select)->getOpcode() == Instruction::Select &&
3760 "Input should be a Select!");
3761
3762 const Value *LHS = nullptr, *RHS = nullptr;
3764 if (SPF != SPF_SMAX && SPF != SPF_SMIN)
3765 return false;
3766
3767 if (!match(RHS, m_APInt(CLow)))
3768 return false;
3769
3770 const Value *LHS2 = nullptr, *RHS2 = nullptr;
3772 if (getInverseMinMaxFlavor(SPF) != SPF2)
3773 return false;
3774
3775 if (!match(RHS2, m_APInt(CHigh)))
3776 return false;
3777
3778 if (SPF == SPF_SMIN)
3779 std::swap(CLow, CHigh);
3780
3781 In = LHS2;
3782 return CLow->sle(*CHigh);
3783}
3784
3786 const APInt *&CLow,
3787 const APInt *&CHigh) {
3788 assert((II->getIntrinsicID() == Intrinsic::smin ||
3789 II->getIntrinsicID() == Intrinsic::smax) && "Must be smin/smax");
3790
3791 Intrinsic::ID InverseID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
3792 auto *InnerII = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
3793 if (!InnerII || InnerII->getIntrinsicID() != InverseID ||
3794 !match(II->getArgOperand(1), m_APInt(CLow)) ||
3795 !match(InnerII->getArgOperand(1), m_APInt(CHigh)))
3796 return false;
3797
3798 if (II->getIntrinsicID() == Intrinsic::smin)
3799 std::swap(CLow, CHigh);
3800 return CLow->sle(*CHigh);
3801}
3802
3803/// For vector constants, loop over the elements and find the constant with the
3804/// minimum number of sign bits. Return 0 if the value is not a vector constant
3805/// or if any element was not analyzed; otherwise, return the count for the
3806/// element with the minimum number of sign bits.
3808 const APInt &DemandedElts,
3809 unsigned TyBits) {
3810 const auto *CV = dyn_cast<Constant>(V);
3811 if (!CV || !isa<FixedVectorType>(CV->getType()))
3812 return 0;
3813
3814 unsigned MinSignBits = TyBits;
3815 unsigned NumElts = cast<FixedVectorType>(CV->getType())->getNumElements();
3816 for (unsigned i = 0; i != NumElts; ++i) {
3817 if (!DemandedElts[i])
3818 continue;
3819 // If we find a non-ConstantInt, bail out.
3820 auto *Elt = dyn_cast_or_null<ConstantInt>(CV->getAggregateElement(i));
3821 if (!Elt)
3822 return 0;
3823
3824 MinSignBits = std::min(MinSignBits, Elt->getValue().getNumSignBits());
3825 }
3826
3827 return MinSignBits;
3828}
3829
3830static unsigned ComputeNumSignBitsImpl(const Value *V,
3831 const APInt &DemandedElts,
3832 unsigned Depth, const SimplifyQuery &Q);
3833
3834static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
3835 unsigned Depth, const SimplifyQuery &Q) {
3836 unsigned Result = ComputeNumSignBitsImpl(V, DemandedElts, Depth, Q);
3837 assert(Result > 0 && "At least one sign bit needs to be present!");
3838 return Result;
3839}
3840
3841/// Return the number of times the sign bit of the register is replicated into
3842/// the other bits. We know that at least 1 bit is always equal to the sign bit
3843/// (itself), but other cases can give us information. For example, immediately
3844/// after an "ashr X, 2", we know that the top 3 bits are all equal to each
3845/// other, so we return 3. For vectors, return the number of sign bits for the
3846/// vector element with the minimum number of known sign bits of the demanded
3847/// elements in the vector specified by DemandedElts.
3848static unsigned ComputeNumSignBitsImpl(const Value *V,
3849 const APInt &DemandedElts,
3850 unsigned Depth, const SimplifyQuery &Q) {
3851 Type *Ty = V->getType();
3852#ifndef NDEBUG
3853 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
3854
3855 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
3856 assert(
3857 FVTy->getNumElements() == DemandedElts.getBitWidth() &&
3858 "DemandedElt width should equal the fixed vector number of elements");
3859 } else {
3860 assert(DemandedElts == APInt(1, 1) &&
3861 "DemandedElt width should be 1 for scalars");
3862 }
3863#endif
3864
3865 // We return the minimum number of sign bits that are guaranteed to be present
3866 // in V, so for undef we have to conservatively return 1. We don't have the
3867 // same behavior for poison though -- that's a FIXME today.
3868
3869 Type *ScalarTy = Ty->getScalarType();
3870 unsigned TyBits = ScalarTy->isPointerTy() ?
3871 Q.DL.getPointerTypeSizeInBits(ScalarTy) :
3872 Q.DL.getTypeSizeInBits(ScalarTy);
3873
3874 unsigned Tmp, Tmp2;
3875 unsigned FirstAnswer = 1;
3876
3877 // Note that ConstantInt is handled by the general computeKnownBits case
3878 // below.
3879
3881 return 1;
3882
3883 if (auto *U = dyn_cast<Operator>(V)) {
3884 switch (Operator::getOpcode(V)) {
3885 default: break;
3886 case Instruction::SExt:
3887 Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
3888 return ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q) +
3889 Tmp;
3890
3891 case Instruction::SDiv: {
3892 const APInt *Denominator;
3893 // sdiv X, C -> adds log(C) sign bits.
3894 if (match(U->getOperand(1), m_APInt(Denominator))) {
3895
3896 // Ignore non-positive denominator.
3897 if (!Denominator->isStrictlyPositive())
3898 break;
3899
3900 // Calculate the incoming numerator bits.
3901 unsigned NumBits =
3902 ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
3903
3904 // Add floor(log(C)) bits to the numerator bits.
3905 return std::min(TyBits, NumBits + Denominator->logBase2());
3906 }
3907 break;
3908 }
3909
3910 case Instruction::SRem: {
3911 Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
3912
3913 const APInt *Denominator;
3914 // srem X, C -> we know that the result is within [-C+1,C) when C is a
3915 // positive constant. This let us put a lower bound on the number of sign
3916 // bits.
3917 if (match(U->getOperand(1), m_APInt(Denominator))) {
3918
3919 // Ignore non-positive denominator.
3920 if (Denominator->isStrictlyPositive()) {
3921 // Calculate the leading sign bit constraints by examining the
3922 // denominator. Given that the denominator is positive, there are two
3923 // cases:
3924 //
3925 // 1. The numerator is positive. The result range is [0,C) and
3926 // [0,C) u< (1 << ceilLogBase2(C)).
3927 //
3928 // 2. The numerator is negative. Then the result range is (-C,0] and
3929 // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
3930 //
3931 // Thus a lower bound on the number of sign bits is `TyBits -
3932 // ceilLogBase2(C)`.
3933
3934 unsigned ResBits = TyBits - Denominator->ceilLogBase2();
3935 Tmp = std::max(Tmp, ResBits);
3936 }
3937 }
3938 return Tmp;
3939 }
3940
3941 case Instruction::AShr: {
3942 Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
3943 // ashr X, C -> adds C sign bits. Vectors too.
3944 const APInt *ShAmt;
3945 if (match(U->getOperand(1), m_APInt(ShAmt))) {
3946 if (ShAmt->uge(TyBits))
3947 break; // Bad shift.
3948 unsigned ShAmtLimited = ShAmt->getZExtValue();
3949 Tmp += ShAmtLimited;
3950 if (Tmp > TyBits) Tmp = TyBits;
3951 }
3952 return Tmp;
3953 }
3954 case Instruction::Shl: {
3955 const APInt *ShAmt;
3956 Value *X = nullptr;
3957 if (match(U->getOperand(1), m_APInt(ShAmt))) {
3958 // shl destroys sign bits.
3959 if (ShAmt->uge(TyBits))
3960 break; // Bad shift.
3961 // We can look through a zext (more or less treating it as a sext) if
3962 // all extended bits are shifted out.
3963 if (match(U->getOperand(0), m_ZExt(m_Value(X))) &&
3964 ShAmt->uge(TyBits - X->getType()->getScalarSizeInBits())) {
3965 Tmp = ComputeNumSignBits(X, DemandedElts, Depth + 1, Q);
3966 Tmp += TyBits - X->getType()->getScalarSizeInBits();
3967 } else
3968 Tmp =
3969 ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
3970 if (ShAmt->uge(Tmp))
3971 break; // Shifted all sign bits out.
3972 Tmp2 = ShAmt->getZExtValue();
3973 return Tmp - Tmp2;
3974 }
3975 break;
3976 }
3977 case Instruction::And:
3978 case Instruction::Or:
3979 case Instruction::Xor: // NOT is handled here.
3980 // Logical binary ops preserve the number of sign bits at the worst.
3981 Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
3982 if (Tmp != 1) {
3983 Tmp2 = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
3984 FirstAnswer = std::min(Tmp, Tmp2);
3985 // We computed what we know about the sign bits as our first
3986 // answer. Now proceed to the generic code that uses
3987 // computeKnownBits, and pick whichever answer is better.
3988 }
3989 break;
3990
3991 case Instruction::Select: {
3992 // If we have a clamp pattern, we know that the number of sign bits will
3993 // be the minimum of the clamp min/max range.
3994 const Value *X;
3995 const APInt *CLow, *CHigh;
3996 if (isSignedMinMaxClamp(U, X, CLow, CHigh))
3997 return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
3998
3999 Tmp = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
4000 if (Tmp == 1)
4001 break;
4002 Tmp2 = ComputeNumSignBits(U->getOperand(2), DemandedElts, Depth + 1, Q);
4003 return std::min(Tmp, Tmp2);
4004 }
4005
4006 case Instruction::Add:
4007 // Add can have at most one carry bit. Thus we know that the output
4008 // is, at worst, one more bit than the inputs.
4009 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
4010 if (Tmp == 1) break;
4011
4012 // Special case decrementing a value (ADD X, -1):
4013 if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
4014 if (CRHS->isAllOnesValue()) {
4015 KnownBits Known(TyBits);
4016 computeKnownBits(U->getOperand(0), DemandedElts, Known, Depth + 1, Q);
4017
4018 // If the input is known to be 0 or 1, the output is 0/-1, which is
4019 // all sign bits set.
4020 if ((Known.Zero | 1).isAllOnes())
4021 return TyBits;
4022
4023 // If we are subtracting one from a positive number, there is no carry
4024 // out of the result.
4025 if (Known.isNonNegative())
4026 return Tmp;
4027 }
4028
4029 Tmp2 = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
4030 if (Tmp2 == 1)
4031 break;
4032 return std::min(Tmp, Tmp2) - 1;
4033
4034 case Instruction::Sub:
4035 Tmp2 = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
4036 if (Tmp2 == 1)
4037 break;
4038
4039 // Handle NEG.
4040 if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
4041 if (CLHS->isNullValue()) {
4042 KnownBits Known(TyBits);
4043 computeKnownBits(U->getOperand(1), DemandedElts, Known, Depth + 1, Q);
4044 // If the input is known to be 0 or 1, the output is 0/-1, which is
4045 // all sign bits set.
4046 if ((Known.Zero | 1).isAllOnes())
4047 return TyBits;
4048
4049 // If the input is known to be positive (the sign bit is known clear),
4050 // the output of the NEG has the same number of sign bits as the
4051 // input.
4052 if (Known.isNonNegative())
4053 return Tmp2;
4054
4055 // Otherwise, we treat this like a SUB.
4056 }
4057
4058 // Sub can have at most one carry bit. Thus we know that the output
4059 // is, at worst, one more bit than the inputs.
4060 Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
4061 if (Tmp == 1)
4062 break;
4063 return std::min(Tmp, Tmp2) - 1;
4064
4065 case Instruction::Mul: {
4066 // The output of the Mul can be at most twice the valid bits in the
4067 // inputs.
4068 unsigned SignBitsOp0 =
4069 ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
4070 if (SignBitsOp0 == 1)
4071 break;
4072 unsigned SignBitsOp1 =
4073 ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
4074 if (SignBitsOp1 == 1)
4075 break;
4076 unsigned OutValidBits =
4077 (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1);
4078 return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1;
4079 }
4080
4081 case Instruction::PHI: {
4082 const PHINode *PN = cast<PHINode>(U);
4083 unsigned NumIncomingValues = PN->getNumIncomingValues();
4084 // Don't analyze large in-degree PHIs.
4085 if (NumIncomingValues > 4) break;
4086 // Unreachable blocks may have zero-operand PHI nodes.
4087 if (NumIncomingValues == 0) break;
4088
4089 // Take the minimum of all incoming values. This can't infinitely loop
4090 // because of our depth threshold.
4092 Tmp = TyBits;
4093 for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) {
4094 if (Tmp == 1) return Tmp;
4095 RecQ.CxtI = PN->getIncomingBlock(i)->getTerminator();
4096 Tmp = std::min(Tmp, ComputeNumSignBits(PN->getIncomingValue(i),
4097 DemandedElts, Depth + 1, RecQ));
4098 }
4099 return Tmp;
4100 }
4101
4102 case Instruction::Trunc: {
4103 // If the input contained enough sign bits that some remain after the
4104 // truncation, then we can make use of that. Otherwise we don't know
4105 // anything.
4106 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
4107 unsigned OperandTyBits = U->getOperand(0)->getType()->getScalarSizeInBits();
4108 if (Tmp > (OperandTyBits - TyBits))
4109 return Tmp - (OperandTyBits - TyBits);
4110
4111 return 1;
4112 }
4113
4114 case Instruction::ExtractElement:
4115 // Look through extract element. At the moment we keep this simple and
4116 // skip tracking the specific element. But at least we might find
4117 // information valid for all elements of the vector (for example if vector
4118 // is sign extended, shifted, etc).
4119 return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
4120
4121 case Instruction::ShuffleVector: {
4122 // Collect the minimum number of sign bits that are shared by every vector
4123 // element referenced by the shuffle.
4124 auto *Shuf = dyn_cast<ShuffleVectorInst>(U);
4125 if (!Shuf) {
4126 // FIXME: Add support for shufflevector constant expressions.
4127 return 1;
4128 }
4129 APInt DemandedLHS, DemandedRHS;
4130 // For undef elements, we don't know anything about the common state of
4131 // the shuffle result.
4132 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
4133 return 1;
4134 Tmp = std::numeric_limits<unsigned>::max();
4135 if (!!DemandedLHS) {
4136 const Value *LHS = Shuf->getOperand(0);
4137 Tmp = ComputeNumSignBits(LHS, DemandedLHS, Depth + 1, Q);
4138 }
4139 // If we don't know anything, early out and try computeKnownBits
4140 // fall-back.
4141 if (Tmp == 1)
4142 break;
4143 if (!!DemandedRHS) {
4144 const Value *RHS = Shuf->getOperand(1);
4145 Tmp2 = ComputeNumSignBits(RHS, DemandedRHS, Depth + 1, Q);
4146 Tmp = std::min(Tmp, Tmp2);
4147 }
4148 // If we don't know anything, early out and try computeKnownBits
4149 // fall-back.
4150 if (Tmp == 1)
4151 break;
4152 assert(Tmp <= TyBits && "Failed to determine minimum sign bits");
4153 return Tmp;
4154 }
4155 case Instruction::Call: {
4156 if (const auto *II = dyn_cast<IntrinsicInst>(U)) {
4157 switch (II->getIntrinsicID()) {
4158 default:
4159 break;
4160 case Intrinsic::abs:
4161 Tmp =
4162 ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
4163 if (Tmp == 1)
4164 break;
4165
4166 // Absolute value reduces number of sign bits by at most 1.
4167 return Tmp - 1;
4168 case Intrinsic::smin:
4169 case Intrinsic::smax: {
4170 const APInt *CLow, *CHigh;
4171 if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh))
4172 return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
4173 }
4174 }
4175 }
4176 }
4177 }
4178 }
4179
4180 // Finally, if we can prove that the top bits of the result are 0's or 1's,
4181 // use this information.
4182
4183 // If we can examine all elements of a vector constant successfully, we're
4184 // done (we can't do any better than that). If not, keep trying.
4185 if (unsigned VecSignBits =
4186 computeNumSignBitsVectorConstant(V, DemandedElts, TyBits))
4187 return VecSignBits;
4188
4189 KnownBits Known(TyBits);
4190 computeKnownBits(V, DemandedElts, Known, Depth, Q);
4191
4192 // If we know that the sign bit is either zero or one, determine the number of
4193 // identical bits in the top of the input value.
4194 return std::max(FirstAnswer, Known.countMinSignBits());
4195}
4196
4198 const TargetLibraryInfo *TLI) {
4199 const Function *F = CB.getCalledFunction();
4200 if (!F)
4202
4203 if (F->isIntrinsic())
4204 return F->getIntrinsicID();
4205
4206 // We are going to infer semantics of a library function based on mapping it
4207 // to an LLVM intrinsic. Check that the library function is available from
4208 // this callbase and in this environment.
4209 LibFunc Func;
4210 if (F->hasLocalLinkage() || !TLI || !TLI->getLibFunc(CB, Func) ||
4211 !CB.onlyReadsMemory())
4213
4214 switch (Func) {
4215 default:
4216 break;
4217 case LibFunc_sin:
4218 case LibFunc_sinf:
4219 case LibFunc_sinl:
4220 return Intrinsic::sin;
4221 case LibFunc_cos:
4222 case LibFunc_cosf:
4223 case LibFunc_cosl:
4224 return Intrinsic::cos;
4225 case LibFunc_tan:
4226 case LibFunc_tanf:
4227 case LibFunc_tanl:
4228 return Intrinsic::tan;
4229 case LibFunc_asin:
4230 case LibFunc_asinf:
4231 case LibFunc_asinl:
4232 return Intrinsic::asin;
4233 case LibFunc_acos:
4234 case LibFunc_acosf:
4235 case LibFunc_acosl:
4236 return Intrinsic::acos;
4237 case LibFunc_atan:
4238 case LibFunc_atanf:
4239 case LibFunc_atanl:
4240 return Intrinsic::atan;
4241 case LibFunc_atan2:
4242 case LibFunc_atan2f:
4243 case LibFunc_atan2l:
4244 return Intrinsic::atan2;
4245 case LibFunc_sinh:
4246 case LibFunc_sinhf:
4247 case LibFunc_sinhl:
4248 return Intrinsic::sinh;
4249 case LibFunc_cosh:
4250 case LibFunc_coshf:
4251 case LibFunc_coshl:
4252 return Intrinsic::cosh;
4253 case LibFunc_tanh:
4254 case LibFunc_tanhf:
4255 case LibFunc_tanhl:
4256 return Intrinsic::tanh;
4257 case LibFunc_exp:
4258 case LibFunc_expf:
4259 case LibFunc_expl:
4260 return Intrinsic::exp;
4261 case LibFunc_exp2:
4262 case LibFunc_exp2f:
4263 case LibFunc_exp2l:
4264 return Intrinsic::exp2;
4265 case LibFunc_exp10:
4266 case LibFunc_exp10f:
4267 case LibFunc_exp10l:
4268 return Intrinsic::exp10;
4269 case LibFunc_log:
4270 case LibFunc_logf:
4271 case LibFunc_logl:
4272 return Intrinsic::log;
4273 case LibFunc_log10:
4274 case LibFunc_log10f:
4275 case LibFunc_log10l:
4276 return Intrinsic::log10;
4277 case LibFunc_log2:
4278 case LibFunc_log2f:
4279 case LibFunc_log2l:
4280 return Intrinsic::log2;
4281 case LibFunc_fabs:
4282 case LibFunc_fabsf:
4283 case LibFunc_fabsl:
4284 return Intrinsic::fabs;
4285 case LibFunc_fmin:
4286 case LibFunc_fminf:
4287 case LibFunc_fminl:
4288 return Intrinsic::minnum;
4289 case LibFunc_fmax:
4290 case LibFunc_fmaxf:
4291 case LibFunc_fmaxl:
4292 return Intrinsic::maxnum;
4293 case LibFunc_copysign:
4294 case LibFunc_copysignf:
4295 case LibFunc_copysignl:
4296 return Intrinsic::copysign;
4297 case LibFunc_floor:
4298 case LibFunc_floorf:
4299 case LibFunc_floorl:
4300 return Intrinsic::floor;
4301 case LibFunc_ceil:
4302 case LibFunc_ceilf:
4303 case LibFunc_ceill:
4304 return Intrinsic::ceil;
4305 case LibFunc_trunc:
4306 case LibFunc_truncf:
4307 case LibFunc_truncl:
4308 return Intrinsic::trunc;
4309 case LibFunc_rint:
4310 case LibFunc_rintf:
4311 case LibFunc_rintl:
4312 return Intrinsic::rint;
4313 case LibFunc_nearbyint:
4314 case LibFunc_nearbyintf:
4315 case LibFunc_nearbyintl:
4316 return Intrinsic::nearbyint;
4317 case LibFunc_round:
4318 case LibFunc_roundf:
4319 case LibFunc_roundl:
4320 return Intrinsic::round;
4321 case LibFunc_roundeven:
4322 case LibFunc_roundevenf:
4323 case LibFunc_roundevenl:
4324 return Intrinsic::roundeven;
4325 case LibFunc_pow:
4326 case LibFunc_powf:
4327 case LibFunc_powl:
4328 return Intrinsic::pow;
4329 case LibFunc_sqrt:
4330 case LibFunc_sqrtf:
4331 case LibFunc_sqrtl:
4332 return Intrinsic::sqrt;
4333 }
4334
4336}
4337
4338/// Return true if it's possible to assume IEEE treatment of input denormals in
4339/// \p F for \p Val.
4340static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
4341 Ty = Ty->getScalarType();
4342 return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE;
4343}
4344
4345static bool inputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) {
4346 Ty = Ty->getScalarType();
4347 DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics());
4348 return Mode.Input == DenormalMode::IEEE ||
4349 Mode.Input == DenormalMode::PositiveZero;
4350}
4351
4352static bool outputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) {
4353 Ty = Ty->getScalarType();
4354 DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics());
4355 return Mode.Output == DenormalMode::IEEE ||
4356 Mode.Output == DenormalMode::PositiveZero;
4357}
4358
4360 return isKnownNeverZero() &&
4362}
4363
4365 Type *Ty) const {
4366 return isKnownNeverNegZero() &&
4368}
4369
4371 Type *Ty) const {
4372 if (!isKnownNeverPosZero())
4373 return false;
4374
4375 // If we know there are no denormals, nothing can be flushed to zero.
4377 return true;
4378
4379 DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics());
4380 switch (Mode.Input) {
4381 case DenormalMode::IEEE:
4382 return true;
4384 // Negative subnormal won't flush to +0
4385 return isKnownNeverPosSubnormal();
4387 default:
4388 // Both positive and negative subnormal could flush to +0
4389 return false;
4390 }
4391
4392 llvm_unreachable("covered switch over denormal mode");
4393}
4394
4396 Type *Ty) {
4397 KnownFPClasses = Src.KnownFPClasses;
4398 // If we aren't assuming the source can't be a zero, we don't have to check if
4399 // a denormal input could be flushed.
4400 if (!Src.isKnownNeverPosZero() && !Src.isKnownNeverNegZero())
4401 return;
4402
4403 // If we know the input can't be a denormal, it can't be flushed to 0.
4404 if (Src.isKnownNeverSubnormal())
4405 return;
4406
4407 DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics());
4408
4409 if (!Src.isKnownNeverPosSubnormal() && Mode != DenormalMode::getIEEE())
4411
4412 if (!Src.isKnownNeverNegSubnormal() && Mode != DenormalMode::getIEEE()) {
4413 if (Mode != DenormalMode::getPositiveZero())
4415
4416 if (Mode.Input == DenormalMode::PositiveZero ||
4417 Mode.Output == DenormalMode::PositiveZero ||
4418 Mode.Input == DenormalMode::Dynamic ||
4419 Mode.Output == DenormalMode::Dynamic)
4421 }
4422}
4423
4425 const Function &F, Type *Ty) {
4426 propagateDenormal(Src, F, Ty);
4427 propagateNaN(Src, /*PreserveSign=*/true);
4428}
4429
4430/// Given an exploded icmp instruction, return true if the comparison only
4431/// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if
4432/// the result of the comparison is true when the input value is signed.
4434 bool &TrueIfSigned) {
4435 switch (Pred) {
4436 case ICmpInst::ICMP_SLT: // True if LHS s< 0
4437 TrueIfSigned = true;
4438 return RHS.isZero();
4439 case ICmpInst::ICMP_SLE: // True if LHS s<= -1
4440 TrueIfSigned = true;
4441 return RHS.isAllOnes();
4442 case ICmpInst::ICMP_SGT: // True if LHS s> -1
4443 TrueIfSigned = false;
4444 return RHS.isAllOnes();
4445 case ICmpInst::ICMP_SGE: // True if LHS s>= 0
4446 TrueIfSigned = false;
4447 return RHS.isZero();
4448 case ICmpInst::ICMP_UGT:
4449 // True if LHS u> RHS and RHS == sign-bit-mask - 1
4450 TrueIfSigned = true;
4451 return RHS.isMaxSignedValue();
4452 case ICmpInst::ICMP_UGE:
4453 // True if LHS u>= RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
4454 TrueIfSigned = true;
4455 return RHS.isMinSignedValue();
4456 case ICmpInst::ICMP_ULT:
4457 // True if LHS u< RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
4458 TrueIfSigned = false;
4459 return RHS.isMinSignedValue();
4460 case ICmpInst::ICMP_ULE:
4461 // True if LHS u<= RHS and RHS == sign-bit-mask - 1
4462 TrueIfSigned = false;
4463 return RHS.isMaxSignedValue();
4464 default:
4465 return false;
4466 }
4467}
4468
4469/// Returns a pair of values, which if passed to llvm.is.fpclass, returns the
4470/// same result as an fcmp with the given operands.
4471std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred,
4472 const Function &F,
4473 Value *LHS, Value *RHS,
4474 bool LookThroughSrc) {
4475 const APFloat *ConstRHS;
4476 if (!match(RHS, m_APFloatAllowPoison(ConstRHS)))
4477 return {nullptr, fcAllFlags};
4478
4479 return fcmpToClassTest(Pred, F, LHS, ConstRHS, LookThroughSrc);
4480}