LLVM  15.0.0git
ValueTracking.cpp
Go to the documentation of this file.
1 //===- ValueTracking.cpp - Walk computations to compute properties --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains routines that help analyze properties that chains of
10 // computations have.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringRef.h"
33 #include "llvm/Analysis/Loads.h"
34 #include "llvm/Analysis/LoopInfo.h"
37 #include "llvm/IR/Argument.h"
38 #include "llvm/IR/Attributes.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/Constant.h"
41 #include "llvm/IR/ConstantRange.h"
42 #include "llvm/IR/Constants.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/DiagnosticInfo.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/Function.h"
48 #include "llvm/IR/GlobalAlias.h"
49 #include "llvm/IR/GlobalValue.h"
50 #include "llvm/IR/GlobalVariable.h"
51 #include "llvm/IR/InstrTypes.h"
52 #include "llvm/IR/Instruction.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/Intrinsics.h"
56 #include "llvm/IR/IntrinsicsAArch64.h"
57 #include "llvm/IR/IntrinsicsRISCV.h"
58 #include "llvm/IR/IntrinsicsX86.h"
59 #include "llvm/IR/LLVMContext.h"
60 #include "llvm/IR/Metadata.h"
61 #include "llvm/IR/Module.h"
62 #include "llvm/IR/Operator.h"
63 #include "llvm/IR/PatternMatch.h"
64 #include "llvm/IR/Type.h"
65 #include "llvm/IR/User.h"
66 #include "llvm/IR/Value.h"
67 #include "llvm/Support/Casting.h"
69 #include "llvm/Support/Compiler.h"
71 #include "llvm/Support/KnownBits.h"
73 #include <algorithm>
74 #include <cassert>
75 #include <cstdint>
76 #include <utility>
77 
78 using namespace llvm;
79 using namespace llvm::PatternMatch;
80 
81 // Controls the number of uses of the value searched for possible
82 // dominating comparisons.
83 static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
84  cl::Hidden, cl::init(20));
85 
86 // According to the LangRef, branching on a poison condition is absolutely
87 // immediate full UB. However, historically we haven't implemented that
88 // consistently as we had an important transformation (non-trivial unswitch)
89 // which introduced instances of branch on poison/undef to otherwise well
90 // defined programs. This issue has since been fixed, but the flag is
91 // temporarily retained to easily diagnose potential regressions.
92 static cl::opt<bool> BranchOnPoisonAsUB("branch-on-poison-as-ub",
93  cl::Hidden, cl::init(true));
94 
95 
96 /// Returns the bitwidth of the given scalar or pointer type. For vector types,
97 /// returns the element type's bitwidth.
98 static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
99  if (unsigned BitWidth = Ty->getScalarSizeInBits())
100  return BitWidth;
101 
102  return DL.getPointerTypeSizeInBits(Ty);
103 }
104 
105 namespace {
106 
107 // Simplifying using an assume can only be done in a particular control-flow
108 // context (the context instruction provides that context). If an assume and
109 // the context instruction are not in the same block then the DT helps in
110 // figuring out if we can use it.
111 struct Query {
112  const DataLayout &DL;
113  AssumptionCache *AC;
114  const Instruction *CxtI;
115  const DominatorTree *DT;
116 
117  // Unlike the other analyses, this may be a nullptr because not all clients
118  // provide it currently.
120 
121  /// If true, it is safe to use metadata during simplification.
122  InstrInfoQuery IIQ;
123 
124  Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI,
125  const DominatorTree *DT, bool UseInstrInfo,
126  OptimizationRemarkEmitter *ORE = nullptr)
127  : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), IIQ(UseInstrInfo) {}
128 };
129 
130 } // end anonymous namespace
131 
132 // Given the provided Value and, potentially, a context instruction, return
133 // the preferred context instruction (if any).
134 static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
135  // If we've been provided with a context instruction, then use that (provided
136  // it has been inserted).
137  if (CxtI && CxtI->getParent())
138  return CxtI;
139 
140  // If the value is really an already-inserted instruction, then use that.
141  CxtI = dyn_cast<Instruction>(V);
142  if (CxtI && CxtI->getParent())
143  return CxtI;
144 
145  return nullptr;
146 }
147 
148 static const Instruction *safeCxtI(const Value *V1, const Value *V2, const Instruction *CxtI) {
149  // If we've been provided with a context instruction, then use that (provided
150  // it has been inserted).
151  if (CxtI && CxtI->getParent())
152  return CxtI;
153 
154  // If the value is really an already-inserted instruction, then use that.
155  CxtI = dyn_cast<Instruction>(V1);
156  if (CxtI && CxtI->getParent())
157  return CxtI;
158 
159  CxtI = dyn_cast<Instruction>(V2);
160  if (CxtI && CxtI->getParent())
161  return CxtI;
162 
163  return nullptr;
164 }
165 
167  const APInt &DemandedElts,
168  APInt &DemandedLHS, APInt &DemandedRHS) {
169  // The length of scalable vectors is unknown at compile time, thus we
170  // cannot check their values
171  if (isa<ScalableVectorType>(Shuf->getType()))
172  return false;
173 
174  int NumElts =
175  cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
176  int NumMaskElts = cast<FixedVectorType>(Shuf->getType())->getNumElements();
177  DemandedLHS = DemandedRHS = APInt::getZero(NumElts);
178  if (DemandedElts.isZero())
179  return true;
180  // Simple case of a shuffle with zeroinitializer.
181  if (all_of(Shuf->getShuffleMask(), [](int Elt) { return Elt == 0; })) {
182  DemandedLHS.setBit(0);
183  return true;
184  }
185  for (int i = 0; i != NumMaskElts; ++i) {
186  if (!DemandedElts[i])
187  continue;
188  int M = Shuf->getMaskValue(i);
189  assert(M < (NumElts * 2) && "Invalid shuffle mask constant");
190 
191  // For undef elements, we don't know anything about the common state of
192  // the shuffle result.
193  if (M == -1)
194  return false;
195  if (M < NumElts)
196  DemandedLHS.setBit(M % NumElts);
197  else
198  DemandedRHS.setBit(M % NumElts);
199  }
200 
201  return true;
202 }
203 
204 static void computeKnownBits(const Value *V, const APInt &DemandedElts,
205  KnownBits &Known, unsigned Depth, const Query &Q);
206 
207 static void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
208  const Query &Q) {
209  // FIXME: We currently have no way to represent the DemandedElts of a scalable
210  // vector
211  if (isa<ScalableVectorType>(V->getType())) {
212  Known.resetAll();
213  return;
214  }
215 
216  auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
217  APInt DemandedElts =
218  FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
219  computeKnownBits(V, DemandedElts, Known, Depth, Q);
220 }
221 
222 void llvm::computeKnownBits(const Value *V, KnownBits &Known,
223  const DataLayout &DL, unsigned Depth,
224  AssumptionCache *AC, const Instruction *CxtI,
225  const DominatorTree *DT,
226  OptimizationRemarkEmitter *ORE, bool UseInstrInfo) {
227  ::computeKnownBits(V, Known, Depth,
228  Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
229 }
230 
231 void llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
232  KnownBits &Known, const DataLayout &DL,
233  unsigned Depth, AssumptionCache *AC,
234  const Instruction *CxtI, const DominatorTree *DT,
235  OptimizationRemarkEmitter *ORE, bool UseInstrInfo) {
236  ::computeKnownBits(V, DemandedElts, Known, Depth,
237  Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
238 }
239 
240 static KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts,
241  unsigned Depth, const Query &Q);
242 
243 static KnownBits computeKnownBits(const Value *V, unsigned Depth,
244  const Query &Q);
245 
247  unsigned Depth, AssumptionCache *AC,
248  const Instruction *CxtI,
249  const DominatorTree *DT,
251  bool UseInstrInfo) {
253  V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
254 }
255 
256 KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
257  const DataLayout &DL, unsigned Depth,
258  AssumptionCache *AC, const Instruction *CxtI,
259  const DominatorTree *DT,
261  bool UseInstrInfo) {
263  V, DemandedElts, Depth,
264  Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
265 }
266 
268  const DataLayout &DL, AssumptionCache *AC,
269  const Instruction *CxtI, const DominatorTree *DT,
270  bool UseInstrInfo) {
271  assert(LHS->getType() == RHS->getType() &&
272  "LHS and RHS should have the same type");
274  "LHS and RHS should be integers");
275  // Look for an inverted mask: (X & ~M) op (Y & M).
276  {
277  Value *M;
278  if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
280  return true;
281  if (match(RHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
283  return true;
284  }
285 
286  // X op (Y & ~X)
287  if (match(RHS, m_c_And(m_Not(m_Specific(LHS)), m_Value())) ||
289  return true;
290 
291  // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern
292  // for constant Y.
293  Value *Y;
294  if (match(RHS,
297  return true;
298 
299  // Look for: (A & B) op ~(A | B)
300  {
301  Value *A, *B;
302  if (match(LHS, m_And(m_Value(A), m_Value(B))) &&
304  return true;
305  if (match(RHS, m_And(m_Value(A), m_Value(B))) &&
307  return true;
308  }
309  IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType());
310  KnownBits LHSKnown(IT->getBitWidth());
311  KnownBits RHSKnown(IT->getBitWidth());
312  computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo);
313  computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo);
314  return KnownBits::haveNoCommonBitsSet(LHSKnown, RHSKnown);
315 }
316 
318  return !I->user_empty() && all_of(I->users(), [](const User *U) {
319  ICmpInst::Predicate P;
320  return match(U, m_ICmp(P, m_Value(), m_Zero())) && ICmpInst::isEquality(P);
321  });
322 }
323 
324 static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
325  const Query &Q);
326 
328  bool OrZero, unsigned Depth,
329  AssumptionCache *AC, const Instruction *CxtI,
330  const DominatorTree *DT, bool UseInstrInfo) {
332  V, OrZero, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
333 }
334 
335 static bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
336  unsigned Depth, const Query &Q);
337 
338 static bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q);
339 
340 bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth,
341  AssumptionCache *AC, const Instruction *CxtI,
342  const DominatorTree *DT, bool UseInstrInfo) {
344  Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
345 }
346 
348  unsigned Depth, AssumptionCache *AC,
349  const Instruction *CxtI, const DominatorTree *DT,
350  bool UseInstrInfo) {
351  KnownBits Known =
352  computeKnownBits(V, DL, Depth, AC, CxtI, DT, nullptr, UseInstrInfo);
353  return Known.isNonNegative();
354 }
355 
356 bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth,
357  AssumptionCache *AC, const Instruction *CxtI,
358  const DominatorTree *DT, bool UseInstrInfo) {
359  if (auto *CI = dyn_cast<ConstantInt>(V))
360  return CI->getValue().isStrictlyPositive();
361 
362  // TODO: We'd doing two recursive queries here. We should factor this such
363  // that only a single query is needed.
364  return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT, UseInstrInfo) &&
365  isKnownNonZero(V, DL, Depth, AC, CxtI, DT, UseInstrInfo);
366 }
367 
368 bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth,
369  AssumptionCache *AC, const Instruction *CxtI,
370  const DominatorTree *DT, bool UseInstrInfo) {
371  KnownBits Known =
372  computeKnownBits(V, DL, Depth, AC, CxtI, DT, nullptr, UseInstrInfo);
373  return Known.isNegative();
374 }
375 
376 static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth,
377  const Query &Q);
378 
379 bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
380  const DataLayout &DL, AssumptionCache *AC,
381  const Instruction *CxtI, const DominatorTree *DT,
382  bool UseInstrInfo) {
384  Query(DL, AC, safeCxtI(V2, V1, CxtI), DT,
385  UseInstrInfo, /*ORE=*/nullptr));
386 }
387 
388 static bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
389  const Query &Q);
390 
391 bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask,
392  const DataLayout &DL, unsigned Depth,
393  AssumptionCache *AC, const Instruction *CxtI,
394  const DominatorTree *DT, bool UseInstrInfo) {
396  V, Mask, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
397 }
398 
399 static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
400  unsigned Depth, const Query &Q);
401 
402 static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
403  const Query &Q) {
404  // FIXME: We currently have no way to represent the DemandedElts of a scalable
405  // vector
406  if (isa<ScalableVectorType>(V->getType()))
407  return 1;
408 
409  auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
410  APInt DemandedElts =
411  FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
412  return ComputeNumSignBits(V, DemandedElts, Depth, Q);
413 }
414 
415 unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
416  unsigned Depth, AssumptionCache *AC,
417  const Instruction *CxtI,
418  const DominatorTree *DT, bool UseInstrInfo) {
420  V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
421 }
422 
424  unsigned Depth, AssumptionCache *AC,
425  const Instruction *CxtI,
426  const DominatorTree *DT) {
427  unsigned SignBits = ComputeNumSignBits(V, DL, Depth, AC, CxtI, DT);
428  return V->getType()->getScalarSizeInBits() - SignBits + 1;
429 }
430 
431 static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
432  bool NSW, const APInt &DemandedElts,
433  KnownBits &KnownOut, KnownBits &Known2,
434  unsigned Depth, const Query &Q) {
435  computeKnownBits(Op1, DemandedElts, KnownOut, Depth + 1, Q);
436 
437  // If one operand is unknown and we have no nowrap information,
438  // the result will be unknown independently of the second operand.
439  if (KnownOut.isUnknown() && !NSW)
440  return;
441 
442  computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q);
443  KnownOut = KnownBits::computeForAddSub(Add, NSW, Known2, KnownOut);
444 }
445 
446 static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
447  const APInt &DemandedElts, KnownBits &Known,
448  KnownBits &Known2, unsigned Depth,
449  const Query &Q) {
450  computeKnownBits(Op1, DemandedElts, Known, Depth + 1, Q);
451  computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q);
452 
453  bool isKnownNegative = false;
454  bool isKnownNonNegative = false;
455  // If the multiplication is known not to overflow, compute the sign bit.
456  if (NSW) {
457  if (Op0 == Op1) {
458  // The product of a number with itself is non-negative.
459  isKnownNonNegative = true;
460  } else {
461  bool isKnownNonNegativeOp1 = Known.isNonNegative();
462  bool isKnownNonNegativeOp0 = Known2.isNonNegative();
463  bool isKnownNegativeOp1 = Known.isNegative();
464  bool isKnownNegativeOp0 = Known2.isNegative();
465  // The product of two numbers with the same sign is non-negative.
466  isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
467  (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
468  // The product of a negative number and a non-negative number is either
469  // negative or zero.
470  if (!isKnownNonNegative)
472  (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
473  Known2.isNonZero()) ||
474  (isKnownNegativeOp0 && isKnownNonNegativeOp1 && Known.isNonZero());
475  }
476  }
477 
478  bool SelfMultiply = Op0 == Op1;
479  // TODO: SelfMultiply can be poison, but not undef.
480  if (SelfMultiply)
481  SelfMultiply &=
482  isGuaranteedNotToBeUndefOrPoison(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1);
483  Known = KnownBits::mul(Known, Known2, SelfMultiply);
484 
485  // Only make use of no-wrap flags if we failed to compute the sign bit
486  // directly. This matters if the multiplication always overflows, in
487  // which case we prefer to follow the result of the direct computation,
488  // though as the program is invoking undefined behaviour we can choose
489  // whatever we like here.
490  if (isKnownNonNegative && !Known.isNegative())
491  Known.makeNonNegative();
492  else if (isKnownNegative && !Known.isNonNegative())
493  Known.makeNegative();
494 }
495 
497  KnownBits &Known) {
498  unsigned BitWidth = Known.getBitWidth();
499  unsigned NumRanges = Ranges.getNumOperands() / 2;
500  assert(NumRanges >= 1);
501 
502  Known.Zero.setAllBits();
503  Known.One.setAllBits();
504 
505  for (unsigned i = 0; i < NumRanges; ++i) {
506  ConstantInt *Lower =
507  mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
508  ConstantInt *Upper =
509  mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
510  ConstantRange Range(Lower->getValue(), Upper->getValue());
511 
512  // The first CommonPrefixBits of all values in Range are equal.
513  unsigned CommonPrefixBits =
514  (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countLeadingZeros();
515  APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits);
516  APInt UnsignedMax = Range.getUnsignedMax().zextOrTrunc(BitWidth);
517  Known.One &= UnsignedMax & Mask;
518  Known.Zero &= ~UnsignedMax & Mask;
519  }
520 }
521 
522 static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
523  SmallVector<const Value *, 16> WorkSet(1, I);
526 
527  // The instruction defining an assumption's condition itself is always
528  // considered ephemeral to that assumption (even if it has other
529  // non-ephemeral users). See r246696's test case for an example.
530  if (is_contained(I->operands(), E))
531  return true;
532 
533  while (!WorkSet.empty()) {
534  const Value *V = WorkSet.pop_back_val();
535  if (!Visited.insert(V).second)
536  continue;
537 
538  // If all uses of this value are ephemeral, then so is this value.
539  if (llvm::all_of(V->users(), [&](const User *U) {
540  return EphValues.count(U);
541  })) {
542  if (V == E)
543  return true;
544 
545  if (V == I || (isa<Instruction>(V) &&
546  !cast<Instruction>(V)->mayHaveSideEffects() &&
547  !cast<Instruction>(V)->isTerminator())) {
548  EphValues.insert(V);
549  if (const User *U = dyn_cast<User>(V))
550  append_range(WorkSet, U->operands());
551  }
552  }
553  }
554 
555  return false;
556 }
557 
558 // Is this an intrinsic that cannot be speculated but also cannot trap?
560  if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(I))
561  return CI->isAssumeLikeIntrinsic();
562 
563  return false;
564 }
565 
567  const Instruction *CxtI,
568  const DominatorTree *DT) {
569  // There are two restrictions on the use of an assume:
570  // 1. The assume must dominate the context (or the control flow must
571  // reach the assume whenever it reaches the context).
572  // 2. The context must not be in the assume's set of ephemeral values
573  // (otherwise we will use the assume to prove that the condition
574  // feeding the assume is trivially true, thus causing the removal of
575  // the assume).
576 
577  if (Inv->getParent() == CxtI->getParent()) {
578  // If Inv and CtxI are in the same block, check if the assume (Inv) is first
579  // in the BB.
580  if (Inv->comesBefore(CxtI))
581  return true;
582 
583  // Don't let an assume affect itself - this would cause the problems
584  // `isEphemeralValueOf` is trying to prevent, and it would also make
585  // the loop below go out of bounds.
586  if (Inv == CxtI)
587  return false;
588 
589  // The context comes first, but they're both in the same block.
590  // Make sure there is nothing in between that might interrupt
591  // the control flow, not even CxtI itself.
592  // We limit the scan distance between the assume and its context instruction
593  // to avoid a compile-time explosion. This limit is chosen arbitrarily, so
594  // it can be adjusted if needed (could be turned into a cl::opt).
595  auto Range = make_range(CxtI->getIterator(), Inv->getIterator());
597  return false;
598 
599  return !isEphemeralValueOf(Inv, CxtI);
600  }
601 
602  // Inv and CxtI are in different blocks.
603  if (DT) {
604  if (DT->dominates(Inv, CxtI))
605  return true;
606  } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) {
607  // We don't have a DT, but this trivially dominates.
608  return true;
609  }
610 
611  return false;
612 }
613 
614 static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) {
615  // v u> y implies v != 0.
616  if (Pred == ICmpInst::ICMP_UGT)
617  return true;
618 
619  // Special-case v != 0 to also handle v != null.
620  if (Pred == ICmpInst::ICMP_NE)
621  return match(RHS, m_Zero());
622 
623  // All other predicates - rely on generic ConstantRange handling.
624  const APInt *C;
625  if (!match(RHS, m_APInt(C)))
626  return false;
627 
629  return !TrueValues.contains(APInt::getZero(C->getBitWidth()));
630 }
631 
632 static bool isKnownNonZeroFromAssume(const Value *V, const Query &Q) {
633  // Use of assumptions is context-sensitive. If we don't have a context, we
634  // cannot use them!
635  if (!Q.AC || !Q.CxtI)
636  return false;
637 
638  if (Q.CxtI && V->getType()->isPointerTy()) {
639  SmallVector<Attribute::AttrKind, 2> AttrKinds{Attribute::NonNull};
640  if (!NullPointerIsDefined(Q.CxtI->getFunction(),
642  AttrKinds.push_back(Attribute::Dereferenceable);
643 
644  if (getKnowledgeValidInContext(V, AttrKinds, Q.CxtI, Q.DT, Q.AC))
645  return true;
646  }
647 
648  for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
649  if (!AssumeVH)
650  continue;
651  CallInst *I = cast<CallInst>(AssumeVH);
652  assert(I->getFunction() == Q.CxtI->getFunction() &&
653  "Got assumption for the wrong function!");
654 
655  // Warning: This loop can end up being somewhat performance sensitive.
656  // We're running this loop for once for each value queried resulting in a
657  // runtime of ~O(#assumes * #values).
658 
659  assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
660  "must be an assume intrinsic");
661 
662  Value *RHS;
663  CmpInst::Predicate Pred;
664  auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V)));
665  if (!match(I->getArgOperand(0), m_c_ICmp(Pred, m_V, m_Value(RHS))))
666  return false;
667 
668  if (cmpExcludesZero(Pred, RHS) && isValidAssumeForContext(I, Q.CxtI, Q.DT))
669  return true;
670  }
671 
672  return false;
673 }
674 
675 static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
676  unsigned Depth, const Query &Q) {
677  // Use of assumptions is context-sensitive. If we don't have a context, we
678  // cannot use them!
679  if (!Q.AC || !Q.CxtI)
680  return;
681 
682  unsigned BitWidth = Known.getBitWidth();
683 
684  // Refine Known set if the pointer alignment is set by assume bundles.
685  if (V->getType()->isPointerTy()) {
687  V, {Attribute::Alignment}, Q.CxtI, Q.DT, Q.AC)) {
688  if (isPowerOf2_64(RK.ArgValue))
689  Known.Zero.setLowBits(Log2_64(RK.ArgValue));
690  }
691  }
692 
693  // Note that the patterns below need to be kept in sync with the code
694  // in AssumptionCache::updateAffectedValues.
695 
696  for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
697  if (!AssumeVH)
698  continue;
699  CallInst *I = cast<CallInst>(AssumeVH);
700  assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() &&
701  "Got assumption for the wrong function!");
702 
703  // Warning: This loop can end up being somewhat performance sensitive.
704  // We're running this loop for once for each value queried resulting in a
705  // runtime of ~O(#assumes * #values).
706 
707  assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
708  "must be an assume intrinsic");
709 
710  Value *Arg = I->getArgOperand(0);
711 
712  if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
713  assert(BitWidth == 1 && "assume operand is not i1?");
714  Known.setAllOnes();
715  return;
716  }
717  if (match(Arg, m_Not(m_Specific(V))) &&
718  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
719  assert(BitWidth == 1 && "assume operand is not i1?");
720  Known.setAllZero();
721  return;
722  }
723 
724  // The remaining tests are all recursive, so bail out if we hit the limit.
726  continue;
727 
728  ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
729  if (!Cmp)
730  continue;
731 
732  // We are attempting to compute known bits for the operands of an assume.
733  // Do not try to use other assumptions for those recursive calls because
734  // that can lead to mutual recursion and a compile-time explosion.
735  // An example of the mutual recursion: computeKnownBits can call
736  // isKnownNonZero which calls computeKnownBitsFromAssume (this function)
737  // and so on.
738  Query QueryNoAC = Q;
739  QueryNoAC.AC = nullptr;
740 
741  // Note that ptrtoint may change the bitwidth.
742  Value *A, *B;
743  auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V)));
744 
745  CmpInst::Predicate Pred;
746  uint64_t C;
747  switch (Cmp->getPredicate()) {
748  default:
749  break;
750  case ICmpInst::ICMP_EQ:
751  // assume(v = a)
752  if (match(Cmp, m_c_ICmp(Pred, m_V, m_Value(A))) &&
753  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
754  KnownBits RHSKnown =
755  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
756  Known.Zero |= RHSKnown.Zero;
757  Known.One |= RHSKnown.One;
758  // assume(v & b = a)
759  } else if (match(Cmp,
760  m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) &&
761  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
762  KnownBits RHSKnown =
763  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
764  KnownBits MaskKnown =
765  computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
766 
767  // For those bits in the mask that are known to be one, we can propagate
768  // known bits from the RHS to V.
769  Known.Zero |= RHSKnown.Zero & MaskKnown.One;
770  Known.One |= RHSKnown.One & MaskKnown.One;
771  // assume(~(v & b) = a)
772  } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
773  m_Value(A))) &&
774  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
775  KnownBits RHSKnown =
776  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
777  KnownBits MaskKnown =
778  computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
779 
780  // For those bits in the mask that are known to be one, we can propagate
781  // inverted known bits from the RHS to V.
782  Known.Zero |= RHSKnown.One & MaskKnown.One;
783  Known.One |= RHSKnown.Zero & MaskKnown.One;
784  // assume(v | b = a)
785  } else if (match(Cmp,
786  m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) &&
787  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
788  KnownBits RHSKnown =
789  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
790  KnownBits BKnown =
791  computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
792 
793  // For those bits in B that are known to be zero, we can propagate known
794  // bits from the RHS to V.
795  Known.Zero |= RHSKnown.Zero & BKnown.Zero;
796  Known.One |= RHSKnown.One & BKnown.Zero;
797  // assume(~(v | b) = a)
798  } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
799  m_Value(A))) &&
800  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
801  KnownBits RHSKnown =
802  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
803  KnownBits BKnown =
804  computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
805 
806  // For those bits in B that are known to be zero, we can propagate
807  // inverted known bits from the RHS to V.
808  Known.Zero |= RHSKnown.One & BKnown.Zero;
809  Known.One |= RHSKnown.Zero & BKnown.Zero;
810  // assume(v ^ b = a)
811  } else if (match(Cmp,
812  m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) &&
813  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
814  KnownBits RHSKnown =
815  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
816  KnownBits BKnown =
817  computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
818 
819  // For those bits in B that are known to be zero, we can propagate known
820  // bits from the RHS to V. For those bits in B that are known to be one,
821  // we can propagate inverted known bits from the RHS to V.
822  Known.Zero |= RHSKnown.Zero & BKnown.Zero;
823  Known.One |= RHSKnown.One & BKnown.Zero;
824  Known.Zero |= RHSKnown.One & BKnown.One;
825  Known.One |= RHSKnown.Zero & BKnown.One;
826  // assume(~(v ^ b) = a)
827  } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
828  m_Value(A))) &&
829  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
830  KnownBits RHSKnown =
831  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
832  KnownBits BKnown =
833  computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
834 
835  // For those bits in B that are known to be zero, we can propagate
836  // inverted known bits from the RHS to V. For those bits in B that are
837  // known to be one, we can propagate known bits from the RHS to V.
838  Known.Zero |= RHSKnown.One & BKnown.Zero;
839  Known.One |= RHSKnown.Zero & BKnown.Zero;
840  Known.Zero |= RHSKnown.Zero & BKnown.One;
841  Known.One |= RHSKnown.One & BKnown.One;
842  // assume(v << c = a)
843  } else if (match(Cmp, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
844  m_Value(A))) &&
845  isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
846  KnownBits RHSKnown =
847  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
848 
849  // For those bits in RHS that are known, we can propagate them to known
850  // bits in V shifted to the right by C.
851  RHSKnown.Zero.lshrInPlace(C);
852  Known.Zero |= RHSKnown.Zero;
853  RHSKnown.One.lshrInPlace(C);
854  Known.One |= RHSKnown.One;
855  // assume(~(v << c) = a)
856  } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
857  m_Value(A))) &&
858  isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
859  KnownBits RHSKnown =
860  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
861  // For those bits in RHS that are known, we can propagate them inverted
862  // to known bits in V shifted to the right by C.
863  RHSKnown.One.lshrInPlace(C);
864  Known.Zero |= RHSKnown.One;
865  RHSKnown.Zero.lshrInPlace(C);
866  Known.One |= RHSKnown.Zero;
867  // assume(v >> c = a)
868  } else if (match(Cmp, m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)),
869  m_Value(A))) &&
870  isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
871  KnownBits RHSKnown =
872  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
873  // For those bits in RHS that are known, we can propagate them to known
874  // bits in V shifted to the right by C.
875  Known.Zero |= RHSKnown.Zero << C;
876  Known.One |= RHSKnown.One << C;
877  // assume(~(v >> c) = a)
878  } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))),
879  m_Value(A))) &&
880  isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
881  KnownBits RHSKnown =
882  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
883  // For those bits in RHS that are known, we can propagate them inverted
884  // to known bits in V shifted to the right by C.
885  Known.Zero |= RHSKnown.One << C;
886  Known.One |= RHSKnown.Zero << C;
887  }
888  break;
889  case ICmpInst::ICMP_SGE:
890  // assume(v >=_s c) where c is non-negative
891  if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
892  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
893  KnownBits RHSKnown =
894  computeKnownBits(A, Depth + 1, QueryNoAC).anyextOrTrunc(BitWidth);
895 
896  if (RHSKnown.isNonNegative()) {
897  // We know that the sign bit is zero.
898  Known.makeNonNegative();
899  }
900  }
901  break;
902  case ICmpInst::ICMP_SGT:
903  // assume(v >_s c) where c is at least -1.
904  if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
905  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
906  KnownBits RHSKnown =
907  computeKnownBits(A, Depth + 1, QueryNoAC).anyextOrTrunc(BitWidth);
908 
909  if (RHSKnown.isAllOnes() || RHSKnown.isNonNegative()) {
910  // We know that the sign bit is zero.
911  Known.makeNonNegative();
912  }
913  }
914  break;
915  case ICmpInst::ICMP_SLE:
916  // assume(v <=_s c) where c is negative
917  if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
918  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
919  KnownBits RHSKnown =
920  computeKnownBits(A, Depth + 1, QueryNoAC).anyextOrTrunc(BitWidth);
921 
922  if (RHSKnown.isNegative()) {
923  // We know that the sign bit is one.
924  Known.makeNegative();
925  }
926  }
927  break;
928  case ICmpInst::ICMP_SLT:
929  // assume(v <_s c) where c is non-positive
930  if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
931  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
932  KnownBits RHSKnown =
933  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
934 
935  if (RHSKnown.isZero() || RHSKnown.isNegative()) {
936  // We know that the sign bit is one.
937  Known.makeNegative();
938  }
939  }
940  break;
941  case ICmpInst::ICMP_ULE:
942  // assume(v <=_u c)
943  if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
944  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
945  KnownBits RHSKnown =
946  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
947 
948  // Whatever high bits in c are zero are known to be zero.
949  Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
950  }
951  break;
952  case ICmpInst::ICMP_ULT:
953  // assume(v <_u c)
954  if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
955  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
956  KnownBits RHSKnown =
957  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
958 
959  // If the RHS is known zero, then this assumption must be wrong (nothing
960  // is unsigned less than zero). Signal a conflict and get out of here.
961  if (RHSKnown.isZero()) {
962  Known.Zero.setAllBits();
963  Known.One.setAllBits();
964  break;
965  }
966 
967  // Whatever high bits in c are zero are known to be zero (if c is a power
968  // of 2, then one more).
969  if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, QueryNoAC))
970  Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros() + 1);
971  else
972  Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
973  }
974  break;
975  }
976  }
977 
978  // If assumptions conflict with each other or previous known bits, then we
979  // have a logical fallacy. It's possible that the assumption is not reachable,
980  // so this isn't a real bug. On the other hand, the program may have undefined
981  // behavior, or we might have a bug in the compiler. We can't assert/crash, so
982  // clear out the known bits, try to warn the user, and hope for the best.
983  if (Known.Zero.intersects(Known.One)) {
984  Known.resetAll();
985 
986  if (Q.ORE)
987  Q.ORE->emit([&]() {
988  auto *CxtI = const_cast<Instruction *>(Q.CxtI);
989  return OptimizationRemarkAnalysis("value-tracking", "BadAssumption",
990  CxtI)
991  << "Detected conflicting code assumptions. Program may "
992  "have undefined behavior, or compiler may have "
993  "internal error.";
994  });
995  }
996 }
997 
998 /// Compute known bits from a shift operator, including those with a
999 /// non-constant shift amount. Known is the output of this function. Known2 is a
1000 /// pre-allocated temporary with the same bit width as Known and on return
1001 /// contains the known bit of the shift value source. KF is an
1002 /// operator-specific function that, given the known-bits and a shift amount,
1003 /// compute the implied known-bits of the shift operator's result respectively
1004 /// for that shift amount. The results from calling KF are conservatively
1005 /// combined for all permitted shift amounts.
1007  const Operator *I, const APInt &DemandedElts, KnownBits &Known,
1008  KnownBits &Known2, unsigned Depth, const Query &Q,
1009  function_ref<KnownBits(const KnownBits &, const KnownBits &)> KF) {
1010  unsigned BitWidth = Known.getBitWidth();
1011  computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1012  computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1013 
1014  // Note: We cannot use Known.Zero.getLimitedValue() here, because if
1015  // BitWidth > 64 and any upper bits are known, we'll end up returning the
1016  // limit value (which implies all bits are known).
1017  uint64_t ShiftAmtKZ = Known.Zero.zextOrTrunc(64).getZExtValue();
1018  uint64_t ShiftAmtKO = Known.One.zextOrTrunc(64).getZExtValue();
1019  bool ShiftAmtIsConstant = Known.isConstant();
1020  bool MaxShiftAmtIsOutOfRange = Known.getMaxValue().uge(BitWidth);
1021 
1022  if (ShiftAmtIsConstant) {
1023  Known = KF(Known2, Known);
1024 
1025  // If the known bits conflict, this must be an overflowing left shift, so
1026  // the shift result is poison. We can return anything we want. Choose 0 for
1027  // the best folding opportunity.
1028  if (Known.hasConflict())
1029  Known.setAllZero();
1030 
1031  return;
1032  }
1033 
1034  // If the shift amount could be greater than or equal to the bit-width of the
1035  // LHS, the value could be poison, but bail out because the check below is
1036  // expensive.
1037  // TODO: Should we just carry on?
1038  if (MaxShiftAmtIsOutOfRange) {
1039  Known.resetAll();
1040  return;
1041  }
1042 
1043  // It would be more-clearly correct to use the two temporaries for this
1044  // calculation. Reusing the APInts here to prevent unnecessary allocations.
1045  Known.resetAll();
1046 
1047  // If we know the shifter operand is nonzero, we can sometimes infer more
1048  // known bits. However this is expensive to compute, so be lazy about it and
1049  // only compute it when absolutely necessary.
1050  Optional<bool> ShifterOperandIsNonZero;
1051 
1052  // Early exit if we can't constrain any well-defined shift amount.
1053  if (!(ShiftAmtKZ & (PowerOf2Ceil(BitWidth) - 1)) &&
1054  !(ShiftAmtKO & (PowerOf2Ceil(BitWidth) - 1))) {
1055  ShifterOperandIsNonZero =
1056  isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q);
1057  if (!*ShifterOperandIsNonZero)
1058  return;
1059  }
1060 
1061  Known.Zero.setAllBits();
1062  Known.One.setAllBits();
1063  for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) {
1064  // Combine the shifted known input bits only for those shift amounts
1065  // compatible with its known constraints.
1066  if ((ShiftAmt & ~ShiftAmtKZ) != ShiftAmt)
1067  continue;
1068  if ((ShiftAmt | ShiftAmtKO) != ShiftAmt)
1069  continue;
1070  // If we know the shifter is nonzero, we may be able to infer more known
1071  // bits. This check is sunk down as far as possible to avoid the expensive
1072  // call to isKnownNonZero if the cheaper checks above fail.
1073  if (ShiftAmt == 0) {
1074  if (!ShifterOperandIsNonZero)
1075  ShifterOperandIsNonZero =
1076  isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q);
1077  if (*ShifterOperandIsNonZero)
1078  continue;
1079  }
1080 
1081  Known = KnownBits::commonBits(
1082  Known, KF(Known2, KnownBits::makeConstant(APInt(32, ShiftAmt))));
1083  }
1084 
1085  // If the known bits conflict, the result is poison. Return a 0 and hope the
1086  // caller can further optimize that.
1087  if (Known.hasConflict())
1088  Known.setAllZero();
1089 }
1090 
1092  const APInt &DemandedElts,
1093  KnownBits &Known, unsigned Depth,
1094  const Query &Q) {
1095  unsigned BitWidth = Known.getBitWidth();
1096 
1097  KnownBits Known2(BitWidth);
1098  switch (I->getOpcode()) {
1099  default: break;
1100  case Instruction::Load:
1101  if (MDNode *MD =
1102  Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_range))
1104  break;
1105  case Instruction::And: {
1106  // If either the LHS or the RHS are Zero, the result is zero.
1107  computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1108  computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1109 
1110  Known &= Known2;
1111 
1112  // and(x, add (x, -1)) is a common idiom that always clears the low bit;
1113  // here we handle the more general case of adding any odd number by
1114  // matching the form add(x, add(x, y)) where y is odd.
1115  // TODO: This could be generalized to clearing any bit set in y where the
1116  // following bit is known to be unset in y.
1117  Value *X = nullptr, *Y = nullptr;
1118  if (!Known.Zero[0] && !Known.One[0] &&
1120  Known2.resetAll();
1121  computeKnownBits(Y, DemandedElts, Known2, Depth + 1, Q);
1122  if (Known2.countMinTrailingOnes() > 0)
1123  Known.Zero.setBit(0);
1124  }
1125  break;
1126  }
1127  case Instruction::Or:
1128  computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1129  computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1130 
1131  Known |= Known2;
1132  break;
1133  case Instruction::Xor:
1134  computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1135  computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1136 
1137  Known ^= Known2;
1138  break;
1139  case Instruction::Mul: {
1140  bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1141  computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, DemandedElts,
1142  Known, Known2, Depth, Q);
1143  break;
1144  }
1145  case Instruction::UDiv: {
1146  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1147  computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1148  Known = KnownBits::udiv(Known, Known2);
1149  break;
1150  }
1151  case Instruction::Select: {
1152  const Value *LHS = nullptr, *RHS = nullptr;
1155  computeKnownBits(RHS, Known, Depth + 1, Q);
1156  computeKnownBits(LHS, Known2, Depth + 1, Q);
1157  switch (SPF) {
1158  default:
1159  llvm_unreachable("Unhandled select pattern flavor!");
1160  case SPF_SMAX:
1161  Known = KnownBits::smax(Known, Known2);
1162  break;
1163  case SPF_SMIN:
1164  Known = KnownBits::smin(Known, Known2);
1165  break;
1166  case SPF_UMAX:
1167  Known = KnownBits::umax(Known, Known2);
1168  break;
1169  case SPF_UMIN:
1170  Known = KnownBits::umin(Known, Known2);
1171  break;
1172  }
1173  break;
1174  }
1175 
1176  computeKnownBits(I->getOperand(2), Known, Depth + 1, Q);
1177  computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1178 
1179  // Only known if known in both the LHS and RHS.
1180  Known = KnownBits::commonBits(Known, Known2);
1181 
1182  if (SPF == SPF_ABS) {
1183  // RHS from matchSelectPattern returns the negation part of abs pattern.
1184  // If the negate has an NSW flag we can assume the sign bit of the result
1185  // will be 0 because that makes abs(INT_MIN) undefined.
1186  if (match(RHS, m_Neg(m_Specific(LHS))) &&
1187  Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(RHS)))
1188  Known.Zero.setSignBit();
1189  }
1190 
1191  break;
1192  }
1193  case Instruction::FPTrunc:
1194  case Instruction::FPExt:
1195  case Instruction::FPToUI:
1196  case Instruction::FPToSI:
1197  case Instruction::SIToFP:
1198  case Instruction::UIToFP:
1199  break; // Can't work with floating point.
1200  case Instruction::PtrToInt:
1201  case Instruction::IntToPtr:
1202  // Fall through and handle them the same as zext/trunc.
1204  case Instruction::ZExt:
1205  case Instruction::Trunc: {
1206  Type *SrcTy = I->getOperand(0)->getType();
1207 
1208  unsigned SrcBitWidth;
1209  // Note that we handle pointer operands here because of inttoptr/ptrtoint
1210  // which fall through here.
1211  Type *ScalarTy = SrcTy->getScalarType();
1212  SrcBitWidth = ScalarTy->isPointerTy() ?
1213  Q.DL.getPointerTypeSizeInBits(ScalarTy) :
1214  Q.DL.getTypeSizeInBits(ScalarTy);
1215 
1216  assert(SrcBitWidth && "SrcBitWidth can't be zero");
1217  Known = Known.anyextOrTrunc(SrcBitWidth);
1218  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1219  Known = Known.zextOrTrunc(BitWidth);
1220  break;
1221  }
1222  case Instruction::BitCast: {
1223  Type *SrcTy = I->getOperand(0)->getType();
1224  if (SrcTy->isIntOrPtrTy() &&
1225  // TODO: For now, not handling conversions like:
1226  // (bitcast i64 %x to <2 x i32>)
1227  !I->getType()->isVectorTy()) {
1228  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1229  break;
1230  }
1231 
1232  // Handle cast from vector integer type to scalar or vector integer.
1233  auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcTy);
1234  if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() ||
1235  !I->getType()->isIntOrIntVectorTy())
1236  break;
1237 
1238  // Look through a cast from narrow vector elements to wider type.
1239  // Examples: v4i32 -> v2i64, v3i8 -> v24
1240  unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits();
1241  if (BitWidth % SubBitWidth == 0) {
1242  // Known bits are automatically intersected across demanded elements of a
1243  // vector. So for example, if a bit is computed as known zero, it must be
1244  // zero across all demanded elements of the vector.
1245  //
1246  // For this bitcast, each demanded element of the output is sub-divided
1247  // across a set of smaller vector elements in the source vector. To get
1248  // the known bits for an entire element of the output, compute the known
1249  // bits for each sub-element sequentially. This is done by shifting the
1250  // one-set-bit demanded elements parameter across the sub-elements for
1251  // consecutive calls to computeKnownBits. We are using the demanded
1252  // elements parameter as a mask operator.
1253  //
1254  // The known bits of each sub-element are then inserted into place
1255  // (dependent on endian) to form the full result of known bits.
1256  unsigned NumElts = DemandedElts.getBitWidth();
1257  unsigned SubScale = BitWidth / SubBitWidth;
1258  APInt SubDemandedElts = APInt::getZero(NumElts * SubScale);
1259  for (unsigned i = 0; i != NumElts; ++i) {
1260  if (DemandedElts[i])
1261  SubDemandedElts.setBit(i * SubScale);
1262  }
1263 
1264  KnownBits KnownSrc(SubBitWidth);
1265  for (unsigned i = 0; i != SubScale; ++i) {
1266  computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc,
1267  Depth + 1, Q);
1268  unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i;
1269  Known.insertBits(KnownSrc, ShiftElt * SubBitWidth);
1270  }
1271  }
1272  break;
1273  }
1274  case Instruction::SExt: {
1275  // Compute the bits in the result that are not present in the input.
1276  unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
1277 
1278  Known = Known.trunc(SrcBitWidth);
1279  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1280  // If the sign bit of the input is known set or clear, then we know the
1281  // top bits of the result.
1282  Known = Known.sext(BitWidth);
1283  break;
1284  }
1285  case Instruction::Shl: {
1286  bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1287  auto KF = [NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt) {
1288  KnownBits Result = KnownBits::shl(KnownVal, KnownAmt);
1289  // If this shift has "nsw" keyword, then the result is either a poison
1290  // value or has the same sign bit as the first operand.
1291  if (NSW) {
1292  if (KnownVal.Zero.isSignBitSet())
1293  Result.Zero.setSignBit();
1294  if (KnownVal.One.isSignBitSet())
1295  Result.One.setSignBit();
1296  }
1297  return Result;
1298  };
1299  computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1300  KF);
1301  // Trailing zeros of a right-shifted constant never decrease.
1302  const APInt *C;
1303  if (match(I->getOperand(0), m_APInt(C)))
1304  Known.Zero.setLowBits(C->countTrailingZeros());
1305  break;
1306  }
1307  case Instruction::LShr: {
1308  auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt) {
1309  return KnownBits::lshr(KnownVal, KnownAmt);
1310  };
1311  computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1312  KF);
1313  // Leading zeros of a left-shifted constant never decrease.
1314  const APInt *C;
1315  if (match(I->getOperand(0), m_APInt(C)))
1316  Known.Zero.setHighBits(C->countLeadingZeros());
1317  break;
1318  }
1319  case Instruction::AShr: {
1320  auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt) {
1321  return KnownBits::ashr(KnownVal, KnownAmt);
1322  };
1323  computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1324  KF);
1325  break;
1326  }
1327  case Instruction::Sub: {
1328  bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1329  computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
1330  DemandedElts, Known, Known2, Depth, Q);
1331  break;
1332  }
1333  case Instruction::Add: {
1334  bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1335  computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
1336  DemandedElts, Known, Known2, Depth, Q);
1337  break;
1338  }
1339  case Instruction::SRem:
1340  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1341  computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1342  Known = KnownBits::srem(Known, Known2);
1343  break;
1344 
1345  case Instruction::URem:
1346  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1347  computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1348  Known = KnownBits::urem(Known, Known2);
1349  break;
1350  case Instruction::Alloca:
1351  Known.Zero.setLowBits(Log2(cast<AllocaInst>(I)->getAlign()));
1352  break;
1353  case Instruction::GetElementPtr: {
1354  // Analyze all of the subscripts of this getelementptr instruction
1355  // to determine if we can prove known low zero bits.
1356  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1357  // Accumulate the constant indices in a separate variable
1358  // to minimize the number of calls to computeForAddSub.
1359  APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true);
1360 
1362  for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
1363  // TrailZ can only become smaller, short-circuit if we hit zero.
1364  if (Known.isUnknown())
1365  break;
1366 
1367  Value *Index = I->getOperand(i);
1368 
1369  // Handle case when index is zero.
1370  Constant *CIndex = dyn_cast<Constant>(Index);
1371  if (CIndex && CIndex->isZeroValue())
1372  continue;
1373 
1374  if (StructType *STy = GTI.getStructTypeOrNull()) {
1375  // Handle struct member offset arithmetic.
1376 
1377  assert(CIndex &&
1378  "Access to structure field must be known at compile time");
1379 
1380  if (CIndex->getType()->isVectorTy())
1381  Index = CIndex->getSplatValue();
1382 
1383  unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
1384  const StructLayout *SL = Q.DL.getStructLayout(STy);
1385  uint64_t Offset = SL->getElementOffset(Idx);
1386  AccConstIndices += Offset;
1387  continue;
1388  }
1389 
1390  // Handle array index arithmetic.
1391  Type *IndexedTy = GTI.getIndexedType();
1392  if (!IndexedTy->isSized()) {
1393  Known.resetAll();
1394  break;
1395  }
1396 
1397  unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits();
1398  KnownBits IndexBits(IndexBitWidth);
1399  computeKnownBits(Index, IndexBits, Depth + 1, Q);
1400  TypeSize IndexTypeSize = Q.DL.getTypeAllocSize(IndexedTy);
1401  uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinSize();
1402  KnownBits ScalingFactor(IndexBitWidth);
1403  // Multiply by current sizeof type.
1404  // &A[i] == A + i * sizeof(*A[i]).
1405  if (IndexTypeSize.isScalable()) {
1406  // For scalable types the only thing we know about sizeof is
1407  // that this is a multiple of the minimum size.
1408  ScalingFactor.Zero.setLowBits(countTrailingZeros(TypeSizeInBytes));
1409  } else if (IndexBits.isConstant()) {
1410  APInt IndexConst = IndexBits.getConstant();
1411  APInt ScalingFactor(IndexBitWidth, TypeSizeInBytes);
1412  IndexConst *= ScalingFactor;
1413  AccConstIndices += IndexConst.sextOrTrunc(BitWidth);
1414  continue;
1415  } else {
1416  ScalingFactor =
1417  KnownBits::makeConstant(APInt(IndexBitWidth, TypeSizeInBytes));
1418  }
1419  IndexBits = KnownBits::mul(IndexBits, ScalingFactor);
1420 
1421  // If the offsets have a different width from the pointer, according
1422  // to the language reference we need to sign-extend or truncate them
1423  // to the width of the pointer.
1424  IndexBits = IndexBits.sextOrTrunc(BitWidth);
1425 
1426  // Note that inbounds does *not* guarantee nsw for the addition, as only
1427  // the offset is signed, while the base address is unsigned.
1429  /*Add=*/true, /*NSW=*/false, Known, IndexBits);
1430  }
1431  if (!Known.isUnknown() && !AccConstIndices.isZero()) {
1432  KnownBits Index = KnownBits::makeConstant(AccConstIndices);
1434  /*Add=*/true, /*NSW=*/false, Known, Index);
1435  }
1436  break;
1437  }
1438  case Instruction::PHI: {
1439  const PHINode *P = cast<PHINode>(I);
1440  BinaryOperator *BO = nullptr;
1441  Value *R = nullptr, *L = nullptr;
1442  if (matchSimpleRecurrence(P, BO, R, L)) {
1443  // Handle the case of a simple two-predecessor recurrence PHI.
1444  // There's a lot more that could theoretically be done here, but
1445  // this is sufficient to catch some interesting cases.
1446  unsigned Opcode = BO->getOpcode();
1447 
1448  // If this is a shift recurrence, we know the bits being shifted in.
1449  // We can combine that with information about the start value of the
1450  // recurrence to conclude facts about the result.
1451  if ((Opcode == Instruction::LShr || Opcode == Instruction::AShr ||
1452  Opcode == Instruction::Shl) &&
1453  BO->getOperand(0) == I) {
1454 
1455  // We have matched a recurrence of the form:
1456  // %iv = [R, %entry], [%iv.next, %backedge]
1457  // %iv.next = shift_op %iv, L
1458 
1459  // Recurse with the phi context to avoid concern about whether facts
1460  // inferred hold at original context instruction. TODO: It may be
1461  // correct to use the original context. IF warranted, explore and
1462  // add sufficient tests to cover.
1463  Query RecQ = Q;
1464  RecQ.CxtI = P;
1465  computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ);
1466  switch (Opcode) {
1467  case Instruction::Shl:
1468  // A shl recurrence will only increase the tailing zeros
1469  Known.Zero.setLowBits(Known2.countMinTrailingZeros());
1470  break;
1471  case Instruction::LShr:
1472  // A lshr recurrence will preserve the leading zeros of the
1473  // start value
1474  Known.Zero.setHighBits(Known2.countMinLeadingZeros());
1475  break;
1476  case Instruction::AShr:
1477  // An ashr recurrence will extend the initial sign bit
1478  Known.Zero.setHighBits(Known2.countMinLeadingZeros());
1479  Known.One.setHighBits(Known2.countMinLeadingOnes());
1480  break;
1481  };
1482  }
1483 
1484  // Check for operations that have the property that if
1485  // both their operands have low zero bits, the result
1486  // will have low zero bits.
1487  if (Opcode == Instruction::Add ||
1488  Opcode == Instruction::Sub ||
1489  Opcode == Instruction::And ||
1490  Opcode == Instruction::Or ||
1491  Opcode == Instruction::Mul) {
1492  // Change the context instruction to the "edge" that flows into the
1493  // phi. This is important because that is where the value is actually
1494  // "evaluated" even though it is used later somewhere else. (see also
1495  // D69571).
1496  Query RecQ = Q;
1497 
1498  unsigned OpNum = P->getOperand(0) == R ? 0 : 1;
1499  Instruction *RInst = P->getIncomingBlock(OpNum)->getTerminator();
1500  Instruction *LInst = P->getIncomingBlock(1-OpNum)->getTerminator();
1501 
1502  // Ok, we have a PHI of the form L op= R. Check for low
1503  // zero bits.
1504  RecQ.CxtI = RInst;
1505  computeKnownBits(R, Known2, Depth + 1, RecQ);
1506 
1507  // We need to take the minimum number of known bits
1508  KnownBits Known3(BitWidth);
1509  RecQ.CxtI = LInst;
1510  computeKnownBits(L, Known3, Depth + 1, RecQ);
1511 
1513  Known3.countMinTrailingZeros()));
1514 
1515  auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(BO);
1516  if (OverflowOp && Q.IIQ.hasNoSignedWrap(OverflowOp)) {
1517  // If initial value of recurrence is nonnegative, and we are adding
1518  // a nonnegative number with nsw, the result can only be nonnegative
1519  // or poison value regardless of the number of times we execute the
1520  // add in phi recurrence. If initial value is negative and we are
1521  // adding a negative number with nsw, the result can only be
1522  // negative or poison value. Similar arguments apply to sub and mul.
1523  //
1524  // (add non-negative, non-negative) --> non-negative
1525  // (add negative, negative) --> negative
1526  if (Opcode == Instruction::Add) {
1527  if (Known2.isNonNegative() && Known3.isNonNegative())
1528  Known.makeNonNegative();
1529  else if (Known2.isNegative() && Known3.isNegative())
1530  Known.makeNegative();
1531  }
1532 
1533  // (sub nsw non-negative, negative) --> non-negative
1534  // (sub nsw negative, non-negative) --> negative
1535  else if (Opcode == Instruction::Sub && BO->getOperand(0) == I) {
1536  if (Known2.isNonNegative() && Known3.isNegative())
1537  Known.makeNonNegative();
1538  else if (Known2.isNegative() && Known3.isNonNegative())
1539  Known.makeNegative();
1540  }
1541 
1542  // (mul nsw non-negative, non-negative) --> non-negative
1543  else if (Opcode == Instruction::Mul && Known2.isNonNegative() &&
1544  Known3.isNonNegative())
1545  Known.makeNonNegative();
1546  }
1547 
1548  break;
1549  }
1550  }
1551 
1552  // Unreachable blocks may have zero-operand PHI nodes.
1553  if (P->getNumIncomingValues() == 0)
1554  break;
1555 
1556  // Otherwise take the unions of the known bit sets of the operands,
1557  // taking conservative care to avoid excessive recursion.
1558  if (Depth < MaxAnalysisRecursionDepth - 1 && !Known.Zero && !Known.One) {
1559  // Skip if every incoming value references to ourself.
1560  if (isa_and_nonnull<UndefValue>(P->hasConstantValue()))
1561  break;
1562 
1563  Known.Zero.setAllBits();
1564  Known.One.setAllBits();
1565  for (unsigned u = 0, e = P->getNumIncomingValues(); u < e; ++u) {
1566  Value *IncValue = P->getIncomingValue(u);
1567  // Skip direct self references.
1568  if (IncValue == P) continue;
1569 
1570  // Change the context instruction to the "edge" that flows into the
1571  // phi. This is important because that is where the value is actually
1572  // "evaluated" even though it is used later somewhere else. (see also
1573  // D69571).
1574  Query RecQ = Q;
1575  RecQ.CxtI = P->getIncomingBlock(u)->getTerminator();
1576 
1577  Known2 = KnownBits(BitWidth);
1578  // Recurse, but cap the recursion to one level, because we don't
1579  // want to waste time spinning around in loops.
1580  computeKnownBits(IncValue, Known2, MaxAnalysisRecursionDepth - 1, RecQ);
1581  Known = KnownBits::commonBits(Known, Known2);
1582  // If all bits have been ruled out, there's no need to check
1583  // more operands.
1584  if (Known.isUnknown())
1585  break;
1586  }
1587  }
1588  break;
1589  }
1590  case Instruction::Call:
1591  case Instruction::Invoke:
1592  // If range metadata is attached to this call, set known bits from that,
1593  // and then intersect with known bits based on other properties of the
1594  // function.
1595  if (MDNode *MD =
1596  Q.IIQ.getMetadata(cast<Instruction>(I), LLVMContext::MD_range))
1598  if (const Value *RV = cast<CallBase>(I)->getReturnedArgOperand()) {
1599  computeKnownBits(RV, Known2, Depth + 1, Q);
1600  Known.Zero |= Known2.Zero;
1601  Known.One |= Known2.One;
1602  }
1603  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1604  switch (II->getIntrinsicID()) {
1605  default: break;
1606  case Intrinsic::abs: {
1607  computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1608  bool IntMinIsPoison = match(II->getArgOperand(1), m_One());
1609  Known = Known2.abs(IntMinIsPoison);
1610  break;
1611  }
1612  case Intrinsic::bitreverse:
1613  computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1614  Known.Zero |= Known2.Zero.reverseBits();
1615  Known.One |= Known2.One.reverseBits();
1616  break;
1617  case Intrinsic::bswap:
1618  computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1619  Known.Zero |= Known2.Zero.byteSwap();
1620  Known.One |= Known2.One.byteSwap();
1621  break;
1622  case Intrinsic::ctlz: {
1623  computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1624  // If we have a known 1, its position is our upper bound.
1625  unsigned PossibleLZ = Known2.countMaxLeadingZeros();
1626  // If this call is poison for 0 input, the result will be less than 2^n.
1627  if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1628  PossibleLZ = std::min(PossibleLZ, BitWidth - 1);
1629  unsigned LowBits = Log2_32(PossibleLZ)+1;
1630  Known.Zero.setBitsFrom(LowBits);
1631  break;
1632  }
1633  case Intrinsic::cttz: {
1634  computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1635  // If we have a known 1, its position is our upper bound.
1636  unsigned PossibleTZ = Known2.countMaxTrailingZeros();
1637  // If this call is poison for 0 input, the result will be less than 2^n.
1638  if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1639  PossibleTZ = std::min(PossibleTZ, BitWidth - 1);
1640  unsigned LowBits = Log2_32(PossibleTZ)+1;
1641  Known.Zero.setBitsFrom(LowBits);
1642  break;
1643  }
1644  case Intrinsic::ctpop: {
1645  computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1646  // We can bound the space the count needs. Also, bits known to be zero
1647  // can't contribute to the population.
1648  unsigned BitsPossiblySet = Known2.countMaxPopulation();
1649  unsigned LowBits = Log2_32(BitsPossiblySet)+1;
1650  Known.Zero.setBitsFrom(LowBits);
1651  // TODO: we could bound KnownOne using the lower bound on the number
1652  // of bits which might be set provided by popcnt KnownOne2.
1653  break;
1654  }
1655  case Intrinsic::fshr:
1656  case Intrinsic::fshl: {
1657  const APInt *SA;
1658  if (!match(I->getOperand(2), m_APInt(SA)))
1659  break;
1660 
1661  // Normalize to funnel shift left.
1662  uint64_t ShiftAmt = SA->urem(BitWidth);
1663  if (II->getIntrinsicID() == Intrinsic::fshr)
1664  ShiftAmt = BitWidth - ShiftAmt;
1665 
1666  KnownBits Known3(BitWidth);
1667  computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1668  computeKnownBits(I->getOperand(1), Known3, Depth + 1, Q);
1669 
1670  Known.Zero =
1671  Known2.Zero.shl(ShiftAmt) | Known3.Zero.lshr(BitWidth - ShiftAmt);
1672  Known.One =
1673  Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt);
1674  break;
1675  }
1676  case Intrinsic::uadd_sat:
1677  case Intrinsic::usub_sat: {
1678  bool IsAdd = II->getIntrinsicID() == Intrinsic::uadd_sat;
1679  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1680  computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1681 
1682  // Add: Leading ones of either operand are preserved.
1683  // Sub: Leading zeros of LHS and leading ones of RHS are preserved
1684  // as leading zeros in the result.
1685  unsigned LeadingKnown;
1686  if (IsAdd)
1687  LeadingKnown = std::max(Known.countMinLeadingOnes(),
1688  Known2.countMinLeadingOnes());
1689  else
1690  LeadingKnown = std::max(Known.countMinLeadingZeros(),
1691  Known2.countMinLeadingOnes());
1692 
1694  IsAdd, /* NSW */ false, Known, Known2);
1695 
1696  // We select between the operation result and all-ones/zero
1697  // respectively, so we can preserve known ones/zeros.
1698  if (IsAdd) {
1699  Known.One.setHighBits(LeadingKnown);
1700  Known.Zero.clearAllBits();
1701  } else {
1702  Known.Zero.setHighBits(LeadingKnown);
1703  Known.One.clearAllBits();
1704  }
1705  break;
1706  }
1707  case Intrinsic::umin:
1708  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1709  computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1710  Known = KnownBits::umin(Known, Known2);
1711  break;
1712  case Intrinsic::umax:
1713  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1714  computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1715  Known = KnownBits::umax(Known, Known2);
1716  break;
1717  case Intrinsic::smin:
1718  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1719  computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1720  Known = KnownBits::smin(Known, Known2);
1721  break;
1722  case Intrinsic::smax:
1723  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1724  computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1725  Known = KnownBits::smax(Known, Known2);
1726  break;
1727  case Intrinsic::x86_sse42_crc32_64_64:
1728  Known.Zero.setBitsFrom(32);
1729  break;
1730  case Intrinsic::riscv_vsetvli:
1731  case Intrinsic::riscv_vsetvlimax:
1732  // Assume that VL output is positive and would fit in an int32_t.
1733  // TODO: VLEN might be capped at 16 bits in a future V spec update.
1734  if (BitWidth >= 32)
1735  Known.Zero.setBitsFrom(31);
1736  break;
1737  case Intrinsic::vscale: {
1738  if (!II->getParent() || !II->getFunction() ||
1739  !II->getFunction()->hasFnAttribute(Attribute::VScaleRange))
1740  break;
1741 
1742  auto Attr = II->getFunction()->getFnAttribute(Attribute::VScaleRange);
1743  Optional<unsigned> VScaleMax = Attr.getVScaleRangeMax();
1744 
1745  if (!VScaleMax)
1746  break;
1747 
1748  unsigned VScaleMin = Attr.getVScaleRangeMin();
1749 
1750  // If vscale min = max then we know the exact value at compile time
1751  // and hence we know the exact bits.
1752  if (VScaleMin == VScaleMax) {
1753  Known.One = VScaleMin;
1754  Known.Zero = VScaleMin;
1755  Known.Zero.flipAllBits();
1756  break;
1757  }
1758 
1759  unsigned FirstZeroHighBit = 32 - countLeadingZeros(*VScaleMax);
1760  if (FirstZeroHighBit < BitWidth)
1761  Known.Zero.setBitsFrom(FirstZeroHighBit);
1762 
1763  break;
1764  }
1765  }
1766  }
1767  break;
1768  case Instruction::ShuffleVector: {
1769  auto *Shuf = dyn_cast<ShuffleVectorInst>(I);
1770  // FIXME: Do we need to handle ConstantExpr involving shufflevectors?
1771  if (!Shuf) {
1772  Known.resetAll();
1773  return;
1774  }
1775  // For undef elements, we don't know anything about the common state of
1776  // the shuffle result.
1777  APInt DemandedLHS, DemandedRHS;
1778  if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) {
1779  Known.resetAll();
1780  return;
1781  }
1782  Known.One.setAllBits();
1783  Known.Zero.setAllBits();
1784  if (!!DemandedLHS) {
1785  const Value *LHS = Shuf->getOperand(0);
1786  computeKnownBits(LHS, DemandedLHS, Known, Depth + 1, Q);
1787  // If we don't know any bits, early out.
1788  if (Known.isUnknown())
1789  break;
1790  }
1791  if (!!DemandedRHS) {
1792  const Value *RHS = Shuf->getOperand(1);
1793  computeKnownBits(RHS, DemandedRHS, Known2, Depth + 1, Q);
1794  Known = KnownBits::commonBits(Known, Known2);
1795  }
1796  break;
1797  }
1798  case Instruction::InsertElement: {
1799  const Value *Vec = I->getOperand(0);
1800  const Value *Elt = I->getOperand(1);
1801  auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2));
1802  // Early out if the index is non-constant or out-of-range.
1803  unsigned NumElts = DemandedElts.getBitWidth();
1804  if (!CIdx || CIdx->getValue().uge(NumElts)) {
1805  Known.resetAll();
1806  return;
1807  }
1808  Known.One.setAllBits();
1809  Known.Zero.setAllBits();
1810  unsigned EltIdx = CIdx->getZExtValue();
1811  // Do we demand the inserted element?
1812  if (DemandedElts[EltIdx]) {
1813  computeKnownBits(Elt, Known, Depth + 1, Q);
1814  // If we don't know any bits, early out.
1815  if (Known.isUnknown())
1816  break;
1817  }
1818  // We don't need the base vector element that has been inserted.
1819  APInt DemandedVecElts = DemandedElts;
1820  DemandedVecElts.clearBit(EltIdx);
1821  if (!!DemandedVecElts) {
1822  computeKnownBits(Vec, DemandedVecElts, Known2, Depth + 1, Q);
1823  Known = KnownBits::commonBits(Known, Known2);
1824  }
1825  break;
1826  }
1827  case Instruction::ExtractElement: {
1828  // Look through extract element. If the index is non-constant or
1829  // out-of-range demand all elements, otherwise just the extracted element.
1830  const Value *Vec = I->getOperand(0);
1831  const Value *Idx = I->getOperand(1);
1832  auto *CIdx = dyn_cast<ConstantInt>(Idx);
1833  if (isa<ScalableVectorType>(Vec->getType())) {
1834  // FIXME: there's probably *something* we can do with scalable vectors
1835  Known.resetAll();
1836  break;
1837  }
1838  unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1839  APInt DemandedVecElts = APInt::getAllOnes(NumElts);
1840  if (CIdx && CIdx->getValue().ult(NumElts))
1841  DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
1842  computeKnownBits(Vec, DemandedVecElts, Known, Depth + 1, Q);
1843  break;
1844  }
1845  case Instruction::ExtractValue:
1846  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {
1847  const ExtractValueInst *EVI = cast<ExtractValueInst>(I);
1848  if (EVI->getNumIndices() != 1) break;
1849  if (EVI->getIndices()[0] == 0) {
1850  switch (II->getIntrinsicID()) {
1851  default: break;
1852  case Intrinsic::uadd_with_overflow:
1853  case Intrinsic::sadd_with_overflow:
1854  computeKnownBitsAddSub(true, II->getArgOperand(0),
1855  II->getArgOperand(1), false, DemandedElts,
1856  Known, Known2, Depth, Q);
1857  break;
1858  case Intrinsic::usub_with_overflow:
1859  case Intrinsic::ssub_with_overflow:
1860  computeKnownBitsAddSub(false, II->getArgOperand(0),
1861  II->getArgOperand(1), false, DemandedElts,
1862  Known, Known2, Depth, Q);
1863  break;
1864  case Intrinsic::umul_with_overflow:
1865  case Intrinsic::smul_with_overflow:
1866  computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false,
1867  DemandedElts, Known, Known2, Depth, Q);
1868  break;
1869  }
1870  }
1871  }
1872  break;
1873  case Instruction::Freeze:
1874  if (isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT,
1875  Depth + 1))
1876  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1877  break;
1878  }
1879 }
1880 
1881 /// Determine which bits of V are known to be either zero or one and return
1882 /// them.
1883 KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts,
1884  unsigned Depth, const Query &Q) {
1885  KnownBits Known(getBitWidth(V->getType(), Q.DL));
1886  computeKnownBits(V, DemandedElts, Known, Depth, Q);
1887  return Known;
1888 }
1889 
1890 /// Determine which bits of V are known to be either zero or one and return
1891 /// them.
1892 KnownBits computeKnownBits(const Value *V, unsigned Depth, const Query &Q) {
1893  KnownBits Known(getBitWidth(V->getType(), Q.DL));
1894  computeKnownBits(V, Known, Depth, Q);
1895  return Known;
1896 }
1897 
1898 /// Determine which bits of V are known to be either zero or one and return
1899 /// them in the Known bit set.
1900 ///
1901 /// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that
1902 /// we cannot optimize based on the assumption that it is zero without changing
1903 /// it to be an explicit zero. If we don't change it to zero, other code could
1904 /// optimized based on the contradictory assumption that it is non-zero.
1905 /// Because instcombine aggressively folds operations with undef args anyway,
1906 /// this won't lose us code quality.
1907 ///
1908 /// This function is defined on values with integer type, values with pointer
1909 /// type, and vectors of integers. In the case
1910 /// where V is a vector, known zero, and known one values are the
1911 /// same width as the vector element, and the bit is set only if it is true
1912 /// for all of the demanded elements in the vector specified by DemandedElts.
1913 void computeKnownBits(const Value *V, const APInt &DemandedElts,
1914  KnownBits &Known, unsigned Depth, const Query &Q) {
1915  if (!DemandedElts || isa<ScalableVectorType>(V->getType())) {
1916  // No demanded elts or V is a scalable vector, better to assume we don't
1917  // know anything.
1918  Known.resetAll();
1919  return;
1920  }
1921 
1922  assert(V && "No Value?");
1923  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
1924 
1925 #ifndef NDEBUG
1926  Type *Ty = V->getType();
1927  unsigned BitWidth = Known.getBitWidth();
1928 
1930  "Not integer or pointer type!");
1931 
1932  if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
1933  assert(
1934  FVTy->getNumElements() == DemandedElts.getBitWidth() &&
1935  "DemandedElt width should equal the fixed vector number of elements");
1936  } else {
1937  assert(DemandedElts == APInt(1, 1) &&
1938  "DemandedElt width should be 1 for scalars");
1939  }
1940 
1941  Type *ScalarTy = Ty->getScalarType();
1942  if (ScalarTy->isPointerTy()) {
1943  assert(BitWidth == Q.DL.getPointerTypeSizeInBits(ScalarTy) &&
1944  "V and Known should have same BitWidth");
1945  } else {
1946  assert(BitWidth == Q.DL.getTypeSizeInBits(ScalarTy) &&
1947  "V and Known should have same BitWidth");
1948  }
1949 #endif
1950 
1951  const APInt *C;
1952  if (match(V, m_APInt(C))) {
1953  // We know all of the bits for a scalar constant or a splat vector constant!
1954  Known = KnownBits::makeConstant(*C);
1955  return;
1956  }
1957  // Null and aggregate-zero are all-zeros.
1958  if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) {
1959  Known.setAllZero();
1960  return;
1961  }
1962  // Handle a constant vector by taking the intersection of the known bits of
1963  // each element.
1964  if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(V)) {
1965  // We know that CDV must be a vector of integers. Take the intersection of
1966  // each element.
1967  Known.Zero.setAllBits(); Known.One.setAllBits();
1968  for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) {
1969  if (!DemandedElts[i])
1970  continue;
1971  APInt Elt = CDV->getElementAsAPInt(i);
1972  Known.Zero &= ~Elt;
1973  Known.One &= Elt;
1974  }
1975  return;
1976  }
1977 
1978  if (const auto *CV = dyn_cast<ConstantVector>(V)) {
1979  // We know that CV must be a vector of integers. Take the intersection of
1980  // each element.
1981  Known.Zero.setAllBits(); Known.One.setAllBits();
1982  for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1983  if (!DemandedElts[i])
1984  continue;
1985  Constant *Element = CV->getAggregateElement(i);
1986  auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
1987  if (!ElementCI) {
1988  Known.resetAll();
1989  return;
1990  }
1991  const APInt &Elt = ElementCI->getValue();
1992  Known.Zero &= ~Elt;
1993  Known.One &= Elt;
1994  }
1995  return;
1996  }
1997 
1998  // Start out not knowing anything.
1999  Known.resetAll();
2000 
2001  // We can't imply anything about undefs.
2002  if (isa<UndefValue>(V))
2003  return;
2004 
2005  // There's no point in looking through other users of ConstantData for
2006  // assumptions. Confirm that we've handled them all.
2007  assert(!isa<ConstantData>(V) && "Unhandled constant data!");
2008 
2009  // All recursive calls that increase depth must come after this.
2011  return;
2012 
2013  // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
2014  // the bits of its aliasee.
2015  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2016  if (!GA->isInterposable())
2017  computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q);
2018  return;
2019  }
2020 
2021  if (const Operator *I = dyn_cast<Operator>(V))
2022  computeKnownBitsFromOperator(I, DemandedElts, Known, Depth, Q);
2023 
2024  // Aligned pointers have trailing zeros - refine Known.Zero set
2025  if (isa<PointerType>(V->getType())) {
2026  Align Alignment = V->getPointerAlignment(Q.DL);
2027  Known.Zero.setLowBits(Log2(Alignment));
2028  }
2029 
2030  // computeKnownBitsFromAssume strictly refines Known.
2031  // Therefore, we run them after computeKnownBitsFromOperator.
2032 
2033  // Check whether a nearby assume intrinsic can determine some known bits.
2034  computeKnownBitsFromAssume(V, Known, Depth, Q);
2035 
2036  assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
2037 }
2038 
2039 /// Try to detect a recurrence that the value of the induction variable is
2040 /// always a power of two (or zero).
2041 static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero,
2042  unsigned Depth, Query &Q) {
2043  BinaryOperator *BO = nullptr;
2044  Value *Start = nullptr, *Step = nullptr;
2045  if (!matchSimpleRecurrence(PN, BO, Start, Step))
2046  return false;
2047 
2048  // Initial value must be a power of two.
2049  for (const Use &U : PN->operands()) {
2050  if (U.get() == Start) {
2051  // Initial value comes from a different BB, need to adjust context
2052  // instruction for analysis.
2053  Q.CxtI = PN->getIncomingBlock(U)->getTerminator();
2054  if (!isKnownToBeAPowerOfTwo(Start, OrZero, Depth, Q))
2055  return false;
2056  }
2057  }
2058 
2059  // Except for Mul, the induction variable must be on the left side of the
2060  // increment expression, otherwise its value can be arbitrary.
2061  if (BO->getOpcode() != Instruction::Mul && BO->getOperand(1) != Step)
2062  return false;
2063 
2064  Q.CxtI = BO->getParent()->getTerminator();
2065  switch (BO->getOpcode()) {
2066  case Instruction::Mul:
2067  // Power of two is closed under multiplication.
2068  return (OrZero || Q.IIQ.hasNoUnsignedWrap(BO) ||
2069  Q.IIQ.hasNoSignedWrap(BO)) &&
2070  isKnownToBeAPowerOfTwo(Step, OrZero, Depth, Q);
2071  case Instruction::SDiv:
2072  // Start value must not be signmask for signed division, so simply being a
2073  // power of two is not sufficient, and it has to be a constant.
2074  if (!match(Start, m_Power2()) || match(Start, m_SignMask()))
2075  return false;
2077  case Instruction::UDiv:
2078  // Divisor must be a power of two.
2079  // If OrZero is false, cannot guarantee induction variable is non-zero after
2080  // division, same for Shr, unless it is exact division.
2081  return (OrZero || Q.IIQ.isExact(BO)) &&
2082  isKnownToBeAPowerOfTwo(Step, false, Depth, Q);
2083  case Instruction::Shl:
2084  return OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO);
2085  case Instruction::AShr:
2086  if (!match(Start, m_Power2()) || match(Start, m_SignMask()))
2087  return false;
2089  case Instruction::LShr:
2090  return OrZero || Q.IIQ.isExact(BO);
2091  default:
2092  return false;
2093  }
2094 }
2095 
2096 /// Return true if the given value is known to have exactly one
2097 /// bit set when defined. For vectors return true if every element is known to
2098 /// be a power of two when defined. Supports values with integer or pointer
2099 /// types and vectors of integers.
2100 bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
2101  const Query &Q) {
2102  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
2103 
2104  // Attempt to match against constants.
2105  if (OrZero && match(V, m_Power2OrZero()))
2106  return true;
2107  if (match(V, m_Power2()))
2108  return true;
2109 
2110  // 1 << X is clearly a power of two if the one is not shifted off the end. If
2111  // it is shifted off the end then the result is undefined.
2112  if (match(V, m_Shl(m_One(), m_Value())))
2113  return true;
2114 
2115  // (signmask) >>l X is clearly a power of two if the one is not shifted off
2116  // the bottom. If it is shifted off the bottom then the result is undefined.
2117  if (match(V, m_LShr(m_SignMask(), m_Value())))
2118  return true;
2119 
2120  // The remaining tests are all recursive, so bail out if we hit the limit.
2122  return false;
2123 
2124  Value *X = nullptr, *Y = nullptr;
2125  // A shift left or a logical shift right of a power of two is a power of two
2126  // or zero.
2127  if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) ||
2128  match(V, m_LShr(m_Value(X), m_Value()))))
2129  return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q);
2130 
2131  if (const ZExtInst *ZI = dyn_cast<ZExtInst>(V))
2132  return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q);
2133 
2134  if (const SelectInst *SI = dyn_cast<SelectInst>(V))
2135  return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) &&
2136  isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q);
2137 
2138  // Peek through min/max.
2139  if (match(V, m_MaxOrMin(m_Value(X), m_Value(Y)))) {
2140  return isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q) &&
2141  isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q);
2142  }
2143 
2144  if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) {
2145  // A power of two and'd with anything is a power of two or zero.
2146  if (isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q) ||
2147  isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, Depth, Q))
2148  return true;
2149  // X & (-X) is always a power of two or zero.
2150  if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X))))
2151  return true;
2152  return false;
2153  }
2154 
2155  // Adding a power-of-two or zero to the same power-of-two or zero yields
2156  // either the original power-of-two, a larger power-of-two or zero.
2157  if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
2158  const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
2159  if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO) ||
2160  Q.IIQ.hasNoSignedWrap(VOBO)) {
2161  if (match(X, m_And(m_Specific(Y), m_Value())) ||
2162  match(X, m_And(m_Value(), m_Specific(Y))))
2163  if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q))
2164  return true;
2165  if (match(Y, m_And(m_Specific(X), m_Value())) ||
2166  match(Y, m_And(m_Value(), m_Specific(X))))
2167  if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q))
2168  return true;
2169 
2170  unsigned BitWidth = V->getType()->getScalarSizeInBits();
2171  KnownBits LHSBits(BitWidth);
2172  computeKnownBits(X, LHSBits, Depth, Q);
2173 
2174  KnownBits RHSBits(BitWidth);
2175  computeKnownBits(Y, RHSBits, Depth, Q);
2176  // If i8 V is a power of two or zero:
2177  // ZeroBits: 1 1 1 0 1 1 1 1
2178  // ~ZeroBits: 0 0 0 1 0 0 0 0
2179  if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2())
2180  // If OrZero isn't set, we cannot give back a zero result.
2181  // Make sure either the LHS or RHS has a bit set.
2182  if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue())
2183  return true;
2184  }
2185  }
2186 
2187  // A PHI node is power of two if all incoming values are power of two, or if
2188  // it is an induction variable where in each step its value is a power of two.
2189  if (const PHINode *PN = dyn_cast<PHINode>(V)) {
2190  Query RecQ = Q;
2191 
2192  // Check if it is an induction variable and always power of two.
2193  if (isPowerOfTwoRecurrence(PN, OrZero, Depth, RecQ))
2194  return true;
2195 
2196  // Recursively check all incoming values. Limit recursion to 2 levels, so
2197  // that search complexity is limited to number of operands^2.
2198  unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
2199  return llvm::all_of(PN->operands(), [&](const Use &U) {
2200  // Value is power of 2 if it is coming from PHI node itself by induction.
2201  if (U.get() == PN)
2202  return true;
2203 
2204  // Change the context instruction to the incoming block where it is
2205  // evaluated.
2206  RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
2207  return isKnownToBeAPowerOfTwo(U.get(), OrZero, NewDepth, RecQ);
2208  });
2209  }
2210 
2211  // An exact divide or right shift can only shift off zero bits, so the result
2212  // is a power of two only if the first operand is a power of two and not
2213  // copying a sign bit (sdiv int_min, 2).
2214  if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) ||
2215  match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) {
2216  return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero,
2217  Depth, Q);
2218  }
2219 
2220  return false;
2221 }
2222 
2223 /// Test whether a GEP's result is known to be non-null.
2224 ///
2225 /// Uses properties inherent in a GEP to try to determine whether it is known
2226 /// to be non-null.
2227 ///
2228 /// Currently this routine does not support vector GEPs.
2229 static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
2230  const Query &Q) {
2231  const Function *F = nullptr;
2232  if (const Instruction *I = dyn_cast<Instruction>(GEP))
2233  F = I->getFunction();
2234 
2235  if (!GEP->isInBounds() ||
2236  NullPointerIsDefined(F, GEP->getPointerAddressSpace()))
2237  return false;
2238 
2239  // FIXME: Support vector-GEPs.
2240  assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP");
2241 
2242  // If the base pointer is non-null, we cannot walk to a null address with an
2243  // inbounds GEP in address space zero.
2244  if (isKnownNonZero(GEP->getPointerOperand(), Depth, Q))
2245  return true;
2246 
2247  // Walk the GEP operands and see if any operand introduces a non-zero offset.
2248  // If so, then the GEP cannot produce a null pointer, as doing so would
2249  // inherently violate the inbounds contract within address space zero.
2250  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
2251  GTI != GTE; ++GTI) {
2252  // Struct types are easy -- they must always be indexed by a constant.
2253  if (StructType *STy = GTI.getStructTypeOrNull()) {
2254  ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
2255  unsigned ElementIdx = OpC->getZExtValue();
2256  const StructLayout *SL = Q.DL.getStructLayout(STy);
2257  uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
2258  if (ElementOffset > 0)
2259  return true;
2260  continue;
2261  }
2262 
2263  // If we have a zero-sized type, the index doesn't matter. Keep looping.
2264  if (Q.DL.getTypeAllocSize(GTI.getIndexedType()).getKnownMinSize() == 0)
2265  continue;
2266 
2267  // Fast path the constant operand case both for efficiency and so we don't
2268  // increment Depth when just zipping down an all-constant GEP.
2269  if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) {
2270  if (!OpC->isZero())
2271  return true;
2272  continue;
2273  }
2274 
2275  // We post-increment Depth here because while isKnownNonZero increments it
2276  // as well, when we pop back up that increment won't persist. We don't want
2277  // to recurse 10k times just because we have 10k GEP operands. We don't
2278  // bail completely out because we want to handle constant GEPs regardless
2279  // of depth.
2281  continue;
2282 
2283  if (isKnownNonZero(GTI.getOperand(), Depth, Q))
2284  return true;
2285  }
2286 
2287  return false;
2288 }
2289 
2291  const Instruction *CtxI,
2292  const DominatorTree *DT) {
2293  if (isa<Constant>(V))
2294  return false;
2295 
2296  if (!CtxI || !DT)
2297  return false;
2298 
2299  unsigned NumUsesExplored = 0;
2300  for (auto *U : V->users()) {
2301  // Avoid massive lists
2302  if (NumUsesExplored >= DomConditionsMaxUses)
2303  break;
2304  NumUsesExplored++;
2305 
2306  // If the value is used as an argument to a call or invoke, then argument
2307  // attributes may provide an answer about null-ness.
2308  if (const auto *CB = dyn_cast<CallBase>(U))
2309  if (auto *CalledFunc = CB->getCalledFunction())
2310  for (const Argument &Arg : CalledFunc->args())
2311  if (CB->getArgOperand(Arg.getArgNo()) == V &&
2312  Arg.hasNonNullAttr(/* AllowUndefOrPoison */ false) &&
2313  DT->dominates(CB, CtxI))
2314  return true;
2315 
2316  // If the value is used as a load/store, then the pointer must be non null.
2317  if (V == getLoadStorePointerOperand(U)) {
2318  const Instruction *I = cast<Instruction>(U);
2319  if (!NullPointerIsDefined(I->getFunction(),
2320  V->getType()->getPointerAddressSpace()) &&
2321  DT->dominates(I, CtxI))
2322  return true;
2323  }
2324 
2325  // Consider only compare instructions uniquely controlling a branch
2326  Value *RHS;
2327  CmpInst::Predicate Pred;
2328  if (!match(U, m_c_ICmp(Pred, m_Specific(V), m_Value(RHS))))
2329  continue;
2330 
2331  bool NonNullIfTrue;
2332  if (cmpExcludesZero(Pred, RHS))
2333  NonNullIfTrue = true;
2335  NonNullIfTrue = false;
2336  else
2337  continue;
2338 
2341  for (auto *CmpU : U->users()) {
2342  assert(WorkList.empty() && "Should be!");
2343  if (Visited.insert(CmpU).second)
2344  WorkList.push_back(CmpU);
2345 
2346  while (!WorkList.empty()) {
2347  auto *Curr = WorkList.pop_back_val();
2348 
2349  // If a user is an AND, add all its users to the work list. We only
2350  // propagate "pred != null" condition through AND because it is only
2351  // correct to assume that all conditions of AND are met in true branch.
2352  // TODO: Support similar logic of OR and EQ predicate?
2353  if (NonNullIfTrue)
2354  if (match(Curr, m_LogicalAnd(m_Value(), m_Value()))) {
2355  for (auto *CurrU : Curr->users())
2356  if (Visited.insert(CurrU).second)
2357  WorkList.push_back(CurrU);
2358  continue;
2359  }
2360 
2361  if (const BranchInst *BI = dyn_cast<BranchInst>(Curr)) {
2362  assert(BI->isConditional() && "uses a comparison!");
2363 
2364  BasicBlock *NonNullSuccessor =
2365  BI->getSuccessor(NonNullIfTrue ? 0 : 1);
2366  BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
2367  if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
2368  return true;
2369  } else if (NonNullIfTrue && isGuard(Curr) &&
2370  DT->dominates(cast<Instruction>(Curr), CtxI)) {
2371  return true;
2372  }
2373  }
2374  }
2375  }
2376 
2377  return false;
2378 }
2379 
2380 /// Does the 'Range' metadata (which must be a valid MD_range operand list)
2381 /// ensure that the value it's attached to is never Value? 'RangeType' is
2382 /// is the type of the value described by the range.
2383 static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) {
2384  const unsigned NumRanges = Ranges->getNumOperands() / 2;
2385  assert(NumRanges >= 1);
2386  for (unsigned i = 0; i < NumRanges; ++i) {
2387  ConstantInt *Lower =
2388  mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 0));
2389  ConstantInt *Upper =
2390  mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 1));
2391  ConstantRange Range(Lower->getValue(), Upper->getValue());
2392  if (Range.contains(Value))
2393  return false;
2394  }
2395  return true;
2396 }
2397 
2398 /// Try to detect a recurrence that monotonically increases/decreases from a
2399 /// non-zero starting value. These are common as induction variables.
2400 static bool isNonZeroRecurrence(const PHINode *PN) {
2401  BinaryOperator *BO = nullptr;
2402  Value *Start = nullptr, *Step = nullptr;
2403  const APInt *StartC, *StepC;
2404  if (!matchSimpleRecurrence(PN, BO, Start, Step) ||
2405  !match(Start, m_APInt(StartC)) || StartC->isZero())
2406  return false;
2407 
2408  switch (BO->getOpcode()) {
2409  case Instruction::Add:
2410  // Starting from non-zero and stepping away from zero can never wrap back
2411  // to zero.
2412  return BO->hasNoUnsignedWrap() ||
2413  (BO->hasNoSignedWrap() && match(Step, m_APInt(StepC)) &&
2414  StartC->isNegative() == StepC->isNegative());
2415  case Instruction::Mul:
2416  return (BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap()) &&
2417  match(Step, m_APInt(StepC)) && !StepC->isZero();
2418  case Instruction::Shl:
2419  return BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap();
2420  case Instruction::AShr:
2421  case Instruction::LShr:
2422  return BO->isExact();
2423  default:
2424  return false;
2425  }
2426 }
2427 
2428 /// Return true if the given value is known to be non-zero when defined. For
2429 /// vectors, return true if every demanded element is known to be non-zero when
2430 /// defined. For pointers, if the context instruction and dominator tree are
2431 /// specified, perform context-sensitive analysis and return true if the
2432 /// pointer couldn't possibly be null at the specified instruction.
2433 /// Supports values with integer or pointer type and vectors of integers.
2434 bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth,
2435  const Query &Q) {
2436  // FIXME: We currently have no way to represent the DemandedElts of a scalable
2437  // vector
2438  if (isa<ScalableVectorType>(V->getType()))
2439  return false;
2440 
2441  if (auto *C = dyn_cast<Constant>(V)) {
2442  if (C->isNullValue())
2443  return false;
2444  if (isa<ConstantInt>(C))
2445  // Must be non-zero due to null test above.
2446  return true;
2447 
2448  if (auto *CE = dyn_cast<ConstantExpr>(C)) {
2449  // See the comment for IntToPtr/PtrToInt instructions below.
2450  if (CE->getOpcode() == Instruction::IntToPtr ||
2451  CE->getOpcode() == Instruction::PtrToInt)
2452  if (Q.DL.getTypeSizeInBits(CE->getOperand(0)->getType())
2453  .getFixedSize() <=
2454  Q.DL.getTypeSizeInBits(CE->getType()).getFixedSize())
2455  return isKnownNonZero(CE->getOperand(0), Depth, Q);
2456  }
2457 
2458  // For constant vectors, check that all elements are undefined or known
2459  // non-zero to determine that the whole vector is known non-zero.
2460  if (auto *VecTy = dyn_cast<FixedVectorType>(C->getType())) {
2461  for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) {
2462  if (!DemandedElts[i])
2463  continue;
2464  Constant *Elt = C->getAggregateElement(i);
2465  if (!Elt || Elt->isNullValue())
2466  return false;
2467  if (!isa<UndefValue>(Elt) && !isa<ConstantInt>(Elt))
2468  return false;
2469  }
2470  return true;
2471  }
2472 
2473  // A global variable in address space 0 is non null unless extern weak
2474  // or an absolute symbol reference. Other address spaces may have null as a
2475  // valid address for a global, so we can't assume anything.
2476  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
2477  if (!GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() &&
2478  GV->getType()->getAddressSpace() == 0)
2479  return true;
2480  } else
2481  return false;
2482  }
2483 
2484  if (auto *I = dyn_cast<Instruction>(V)) {
2485  if (MDNode *Ranges = Q.IIQ.getMetadata(I, LLVMContext::MD_range)) {
2486  // If the possible ranges don't contain zero, then the value is
2487  // definitely non-zero.
2488  if (auto *Ty = dyn_cast<IntegerType>(V->getType())) {
2489  const APInt ZeroValue(Ty->getBitWidth(), 0);
2490  if (rangeMetadataExcludesValue(Ranges, ZeroValue))
2491  return true;
2492  }
2493  }
2494  }
2495 
2496  if (isKnownNonZeroFromAssume(V, Q))
2497  return true;
2498 
2499  // Some of the tests below are recursive, so bail out if we hit the limit.
2501  return false;
2502 
2503  // Check for pointer simplifications.
2504 
2505  if (PointerType *PtrTy = dyn_cast<PointerType>(V->getType())) {
2506  // Alloca never returns null, malloc might.
2507  if (isa<AllocaInst>(V) && Q.DL.getAllocaAddrSpace() == 0)
2508  return true;
2509 
2510  // A byval, inalloca may not be null in a non-default addres space. A
2511  // nonnull argument is assumed never 0.
2512  if (const Argument *A = dyn_cast<Argument>(V)) {
2513  if (((A->hasPassPointeeByValueCopyAttr() &&
2514  !NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())) ||
2515  A->hasNonNullAttr()))
2516  return true;
2517  }
2518 
2519  // A Load tagged with nonnull metadata is never null.
2520  if (const LoadInst *LI = dyn_cast<LoadInst>(V))
2521  if (Q.IIQ.getMetadata(LI, LLVMContext::MD_nonnull))
2522  return true;
2523 
2524  if (const auto *Call = dyn_cast<CallBase>(V)) {
2525  if (Call->isReturnNonNull())
2526  return true;
2527  if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true))
2528  return isKnownNonZero(RP, Depth, Q);
2529  }
2530  }
2531 
2532  if (isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT))
2533  return true;
2534 
2535  // Check for recursive pointer simplifications.
2536  if (V->getType()->isPointerTy()) {
2537  // Look through bitcast operations, GEPs, and int2ptr instructions as they
2538  // do not alter the value, or at least not the nullness property of the
2539  // value, e.g., int2ptr is allowed to zero/sign extend the value.
2540  //
2541  // Note that we have to take special care to avoid looking through
2542  // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well
2543  // as casts that can alter the value, e.g., AddrSpaceCasts.
2544  if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V))
2545  return isGEPKnownNonNull(GEP, Depth, Q);
2546 
2547  if (auto *BCO = dyn_cast<BitCastOperator>(V))
2548  return isKnownNonZero(BCO->getOperand(0), Depth, Q);
2549 
2550  if (auto *I2P = dyn_cast<IntToPtrInst>(V))
2551  if (Q.DL.getTypeSizeInBits(I2P->getSrcTy()).getFixedSize() <=
2552  Q.DL.getTypeSizeInBits(I2P->getDestTy()).getFixedSize())
2553  return isKnownNonZero(I2P->getOperand(0), Depth, Q);
2554  }
2555 
2556  // Similar to int2ptr above, we can look through ptr2int here if the cast
2557  // is a no-op or an extend and not a truncate.
2558  if (auto *P2I = dyn_cast<PtrToIntInst>(V))
2559  if (Q.DL.getTypeSizeInBits(P2I->getSrcTy()).getFixedSize() <=
2560  Q.DL.getTypeSizeInBits(P2I->getDestTy()).getFixedSize())
2561  return isKnownNonZero(P2I->getOperand(0), Depth, Q);
2562 
2563  unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), Q.DL);
2564 
2565  // X | Y != 0 if X != 0 or Y != 0.
2566  Value *X = nullptr, *Y = nullptr;
2567  if (match(V, m_Or(m_Value(X), m_Value(Y))))
2568  return isKnownNonZero(X, DemandedElts, Depth, Q) ||
2569  isKnownNonZero(Y, DemandedElts, Depth, Q);
2570 
2571  // ext X != 0 if X != 0.
2572  if (isa<SExtInst>(V) || isa<ZExtInst>(V))
2573  return isKnownNonZero(cast<Instruction>(V)->getOperand(0), Depth, Q);
2574 
2575  // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined
2576  // if the lowest bit is shifted off the end.
2577  if (match(V, m_Shl(m_Value(X), m_Value(Y)))) {
2578  // shl nuw can't remove any non-zero bits.
2579  const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
2580  if (Q.IIQ.hasNoUnsignedWrap(BO))
2581  return isKnownNonZero(X, Depth, Q);
2582 
2583  KnownBits Known(BitWidth);
2584  computeKnownBits(X, DemandedElts, Known, Depth, Q);
2585  if (Known.One[0])
2586  return true;
2587  }
2588  // shr X, Y != 0 if X is negative. Note that the value of the shift is not
2589  // defined if the sign bit is shifted off the end.
2590  else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) {
2591  // shr exact can only shift out zero bits.
2592  const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V);
2593  if (BO->isExact())
2594  return isKnownNonZero(X, Depth, Q);
2595 
2596  KnownBits Known = computeKnownBits(X, DemandedElts, Depth, Q);
2597  if (Known.isNegative())
2598  return true;
2599 
2600  // If the shifter operand is a constant, and all of the bits shifted
2601  // out are known to be zero, and X is known non-zero then at least one
2602  // non-zero bit must remain.
2603  if (ConstantInt *Shift = dyn_cast<ConstantInt>(Y)) {
2604  auto ShiftVal = Shift->getLimitedValue(BitWidth - 1);
2605  // Is there a known one in the portion not shifted out?
2606  if (Known.countMaxLeadingZeros() < BitWidth - ShiftVal)
2607  return true;
2608  // Are all the bits to be shifted out known zero?
2609  if (Known.countMinTrailingZeros() >= ShiftVal)
2610  return isKnownNonZero(X, DemandedElts, Depth, Q);
2611  }
2612  }
2613  // div exact can only produce a zero if the dividend is zero.
2614  else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) {
2615  return isKnownNonZero(X, DemandedElts, Depth, Q);
2616  }
2617  // X + Y.
2618  else if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
2619  KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q);
2620  KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q);
2621 
2622  // If X and Y are both non-negative (as signed values) then their sum is not
2623  // zero unless both X and Y are zero.
2624  if (XKnown.isNonNegative() && YKnown.isNonNegative())
2625  if (isKnownNonZero(X, DemandedElts, Depth, Q) ||
2626  isKnownNonZero(Y, DemandedElts, Depth, Q))
2627  return true;
2628 
2629  // If X and Y are both negative (as signed values) then their sum is not
2630  // zero unless both X and Y equal INT_MIN.
2631  if (XKnown.isNegative() && YKnown.isNegative()) {
2633  // The sign bit of X is set. If some other bit is set then X is not equal
2634  // to INT_MIN.
2635  if (XKnown.One.intersects(Mask))
2636  return true;
2637  // The sign bit of Y is set. If some other bit is set then Y is not equal
2638  // to INT_MIN.
2639  if (YKnown.One.intersects(Mask))
2640  return true;
2641  }
2642 
2643  // The sum of a non-negative number and a power of two is not zero.
2644  if (XKnown.isNonNegative() &&
2645  isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q))
2646  return true;
2647  if (YKnown.isNonNegative() &&
2648  isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q))
2649  return true;
2650  }
2651  // X * Y.
2652  else if (match(V, m_Mul(m_Value(X), m_Value(Y)))) {
2653  const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
2654  // If X and Y are non-zero then so is X * Y as long as the multiplication
2655  // does not overflow.
2656  if ((Q.IIQ.hasNoSignedWrap(BO) || Q.IIQ.hasNoUnsignedWrap(BO)) &&
2657  isKnownNonZero(X, DemandedElts, Depth, Q) &&
2658  isKnownNonZero(Y, DemandedElts, Depth, Q))
2659  return true;
2660  }
2661  // (C ? X : Y) != 0 if X != 0 and Y != 0.
2662  else if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
2663  if (isKnownNonZero(SI->getTrueValue(), DemandedElts, Depth, Q) &&
2664  isKnownNonZero(SI->getFalseValue(), DemandedElts, Depth, Q))
2665  return true;
2666  }
2667  // PHI
2668  else if (const PHINode *PN = dyn_cast<PHINode>(V)) {
2669  if (Q.IIQ.UseInstrInfo && isNonZeroRecurrence(PN))
2670  return true;
2671 
2672  // Check if all incoming values are non-zero using recursion.
2673  Query RecQ = Q;
2674  unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
2675  return llvm::all_of(PN->operands(), [&](const Use &U) {
2676  if (U.get() == PN)
2677  return true;
2678  RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
2679  return isKnownNonZero(U.get(), DemandedElts, NewDepth, RecQ);
2680  });
2681  }
2682  // ExtractElement
2683  else if (const auto *EEI = dyn_cast<ExtractElementInst>(V)) {
2684  const Value *Vec = EEI->getVectorOperand();
2685  const Value *Idx = EEI->getIndexOperand();
2686  auto *CIdx = dyn_cast<ConstantInt>(Idx);
2687  if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) {
2688  unsigned NumElts = VecTy->getNumElements();
2689  APInt DemandedVecElts = APInt::getAllOnes(NumElts);
2690  if (CIdx && CIdx->getValue().ult(NumElts))
2691  DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
2692  return isKnownNonZero(Vec, DemandedVecElts, Depth, Q);
2693  }
2694  }
2695  // Freeze
2696  else if (const FreezeInst *FI = dyn_cast<FreezeInst>(V)) {
2697  auto *Op = FI->getOperand(0);
2698  if (isKnownNonZero(Op, Depth, Q) &&
2699  isGuaranteedNotToBePoison(Op, Q.AC, Q.CxtI, Q.DT, Depth))
2700  return true;
2701  } else if (const auto *II = dyn_cast<IntrinsicInst>(V)) {
2702  if (II->getIntrinsicID() == Intrinsic::vscale)
2703  return true;
2704  }
2705 
2706  KnownBits Known(BitWidth);
2707  computeKnownBits(V, DemandedElts, Known, Depth, Q);
2708  return Known.One != 0;
2709 }
2710 
2711 bool isKnownNonZero(const Value* V, unsigned Depth, const Query& Q) {
2712  // FIXME: We currently have no way to represent the DemandedElts of a scalable
2713  // vector
2714  if (isa<ScalableVectorType>(V->getType()))
2715  return false;
2716 
2717  auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
2718  APInt DemandedElts =
2719  FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
2720  return isKnownNonZero(V, DemandedElts, Depth, Q);
2721 }
2722 
2723 /// If the pair of operators are the same invertible function, return the
2724 /// the operands of the function corresponding to each input. Otherwise,
2725 /// return None. An invertible function is one that is 1-to-1 and maps
2726 /// every input value to exactly one output value. This is equivalent to
2727 /// saying that Op1 and Op2 are equal exactly when the specified pair of
2728 /// operands are equal, (except that Op1 and Op2 may be poison more often.)
2731  const Operator *Op2) {
2732  if (Op1->getOpcode() != Op2->getOpcode())
2733  return None;
2734 
2735  auto getOperands = [&](unsigned OpNum) -> auto {
2736  return std::make_pair(Op1->getOperand(OpNum), Op2->getOperand(OpNum));
2737  };
2738 
2739  switch (Op1->getOpcode()) {
2740  default:
2741  break;
2742  case Instruction::Add:
2743  case Instruction::Sub:
2744  if (Op1->getOperand(0) == Op2->getOperand(0))
2745  return getOperands(1);
2746  if (Op1->getOperand(1) == Op2->getOperand(1))
2747  return getOperands(0);
2748  break;
2749  case Instruction::Mul: {
2750  // invertible if A * B == (A * B) mod 2^N where A, and B are integers
2751  // and N is the bitwdith. The nsw case is non-obvious, but proven by
2752  // alive2: https://alive2.llvm.org/ce/z/Z6D5qK
2753  auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
2754  auto *OBO2 = cast<OverflowingBinaryOperator>(Op2);
2755  if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) &&
2756  (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap()))
2757  break;
2758 
2759  // Assume operand order has been canonicalized
2760  if (Op1->getOperand(1) == Op2->getOperand(1) &&
2761  isa<ConstantInt>(Op1->getOperand(1)) &&
2762  !cast<ConstantInt>(Op1->getOperand(1))->isZero())
2763  return getOperands(0);
2764  break;
2765  }
2766  case Instruction::Shl: {
2767  // Same as multiplies, with the difference that we don't need to check
2768  // for a non-zero multiply. Shifts always multiply by non-zero.
2769  auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
2770  auto *OBO2 = cast<OverflowingBinaryOperator>(Op2);
2771  if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) &&
2772  (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap()))
2773  break;
2774 
2775  if (Op1->getOperand(1) == Op2->getOperand(1))
2776  return getOperands(0);
2777  break;
2778  }
2779  case Instruction::AShr:
2780  case Instruction::LShr: {
2781  auto *PEO1 = cast<PossiblyExactOperator>(Op1);
2782  auto *PEO2 = cast<PossiblyExactOperator>(Op2);
2783  if (!PEO1->isExact() || !PEO2->isExact())
2784  break;
2785 
2786  if (Op1->getOperand(1) == Op2->getOperand(1))
2787  return getOperands(0);
2788  break;
2789  }
2790  case Instruction::SExt:
2791  case Instruction::ZExt:
2792  if (Op1->getOperand(0)->getType() == Op2->getOperand(0)->getType())
2793  return getOperands(0);
2794  break;
2795  case Instruction::PHI: {
2796  const PHINode *PN1 = cast<PHINode>(Op1);
2797  const PHINode *PN2 = cast<PHINode>(Op2);
2798 
2799  // If PN1 and PN2 are both recurrences, can we prove the entire recurrences
2800  // are a single invertible function of the start values? Note that repeated
2801  // application of an invertible function is also invertible
2802  BinaryOperator *BO1 = nullptr;
2803  Value *Start1 = nullptr, *Step1 = nullptr;
2804  BinaryOperator *BO2 = nullptr;
2805  Value *Start2 = nullptr, *Step2 = nullptr;
2806  if (PN1->getParent() != PN2->getParent() ||
2807  !matchSimpleRecurrence(PN1, BO1, Start1, Step1) ||
2808  !matchSimpleRecurrence(PN2, BO2, Start2, Step2))
2809  break;
2810 
2811  auto Values = getInvertibleOperands(cast<Operator>(BO1),
2812  cast<Operator>(BO2));
2813  if (!Values)
2814  break;
2815 
2816  // We have to be careful of mutually defined recurrences here. Ex:
2817  // * X_i = X_(i-1) OP Y_(i-1), and Y_i = X_(i-1) OP V
2818  // * X_i = Y_i = X_(i-1) OP Y_(i-1)
2819  // The invertibility of these is complicated, and not worth reasoning
2820  // about (yet?).
2821  if (Values->first != PN1 || Values->second != PN2)
2822  break;
2823 
2824  return std::make_pair(Start1, Start2);
2825  }
2826  }
2827  return None;
2828 }
2829 
2830 /// Return true if V2 == V1 + X, where X is known non-zero.
2831 static bool isAddOfNonZero(const Value *V1, const Value *V2, unsigned Depth,
2832  const Query &Q) {
2833  const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
2834  if (!BO || BO->getOpcode() != Instruction::Add)
2835  return false;
2836  Value *Op = nullptr;
2837  if (V2 == BO->getOperand(0))
2838  Op = BO->getOperand(1);
2839  else if (V2 == BO->getOperand(1))
2840  Op = BO->getOperand(0);
2841  else
2842  return false;
2843  return isKnownNonZero(Op, Depth + 1, Q);
2844 }
2845 
2846 /// Return true if V2 == V1 * C, where V1 is known non-zero, C is not 0/1 and
2847 /// the multiplication is nuw or nsw.
2848 static bool isNonEqualMul(const Value *V1, const Value *V2, unsigned Depth,
2849  const Query &Q) {
2850  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) {
2851  const APInt *C;
2852  return match(OBO, m_Mul(m_Specific(V1), m_APInt(C))) &&
2853  (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
2854  !C->isZero() && !C->isOne() && isKnownNonZero(V1, Depth + 1, Q);
2855  }
2856  return false;
2857 }
2858 
2859 /// Return true if V2 == V1 << C, where V1 is known non-zero, C is not 0 and
2860 /// the shift is nuw or nsw.
2861 static bool isNonEqualShl(const Value *V1, const Value *V2, unsigned Depth,
2862  const Query &Q) {
2863  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) {
2864  const APInt *C;
2865  return match(OBO, m_Shl(m_Specific(V1), m_APInt(C))) &&
2866  (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
2867  !C->isZero() && isKnownNonZero(V1, Depth + 1, Q);
2868  }
2869  return false;
2870 }
2871 
2872 static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2,
2873  unsigned Depth, const Query &Q) {
2874  // Check two PHIs are in same block.
2875  if (PN1->getParent() != PN2->getParent())
2876  return false;
2877 
2879  bool UsedFullRecursion = false;
2880  for (const BasicBlock *IncomBB : PN1->blocks()) {
2881  if (!VisitedBBs.insert(IncomBB).second)
2882  continue; // Don't reprocess blocks that we have dealt with already.
2883  const Value *IV1 = PN1->getIncomingValueForBlock(IncomBB);
2884  const Value *IV2 = PN2->getIncomingValueForBlock(IncomBB);
2885  const APInt *C1, *C2;
2886  if (match(IV1, m_APInt(C1)) && match(IV2, m_APInt(C2)) && *C1 != *C2)
2887  continue;
2888 
2889  // Only one pair of phi operands is allowed for full recursion.
2890  if (UsedFullRecursion)
2891  return false;
2892 
2893  Query RecQ = Q;
2894  RecQ.CxtI = IncomBB->getTerminator();
2895  if (!isKnownNonEqual(IV1, IV2, Depth + 1, RecQ))
2896  return false;
2897  UsedFullRecursion = true;
2898  }
2899  return true;
2900 }
2901 
2902 /// Return true if it is known that V1 != V2.
2903 static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth,
2904  const Query &Q) {
2905  if (V1 == V2)
2906  return false;
2907  if (V1->getType() != V2->getType())
2908  // We can't look through casts yet.
2909  return false;
2910 
2912  return false;
2913 
2914  // See if we can recurse through (exactly one of) our operands. This
2915  // requires our operation be 1-to-1 and map every input value to exactly
2916  // one output value. Such an operation is invertible.
2917  auto *O1 = dyn_cast<Operator>(V1);
2918  auto *O2 = dyn_cast<Operator>(V2);
2919  if (O1 && O2 && O1->getOpcode() == O2->getOpcode()) {
2920  if (auto Values = getInvertibleOperands(O1, O2))
2921  return isKnownNonEqual(Values->first, Values->second, Depth + 1, Q);
2922 
2923  if (const PHINode *PN1 = dyn_cast<PHINode>(V1)) {
2924  const PHINode *PN2 = cast<PHINode>(V2);
2925  // FIXME: This is missing a generalization to handle the case where one is
2926  // a PHI and another one isn't.
2927  if (isNonEqualPHIs(PN1, PN2, Depth, Q))
2928  return true;
2929  };
2930  }
2931 
2932  if (isAddOfNonZero(V1, V2, Depth, Q) || isAddOfNonZero(V2, V1, Depth, Q))
2933  return true;
2934 
2935  if (isNonEqualMul(V1, V2, Depth, Q) || isNonEqualMul(V2, V1, Depth, Q))
2936  return true;
2937 
2938  if (isNonEqualShl(V1, V2, Depth, Q) || isNonEqualShl(V2, V1, Depth, Q))
2939  return true;
2940 
2941  if (V1->getType()->isIntOrIntVectorTy()) {
2942  // Are any known bits in V1 contradictory to known bits in V2? If V1
2943  // has a known zero where V2 has a known one, they must not be equal.
2944  KnownBits Known1 = computeKnownBits(V1, Depth, Q);
2945  KnownBits Known2 = computeKnownBits(V2, Depth, Q);
2946 
2947  if (Known1.Zero.intersects(Known2.One) ||
2948  Known2.Zero.intersects(Known1.One))
2949  return true;
2950  }
2951  return false;
2952 }
2953 
2954 /// Return true if 'V & Mask' is known to be zero. We use this predicate to
2955 /// simplify operations downstream. Mask is known to be zero for bits that V
2956 /// cannot have.
2957 ///
2958 /// This function is defined on values with integer type, values with pointer
2959 /// type, and vectors of integers. In the case
2960 /// where V is a vector, the mask, known zero, and known one values are the
2961 /// same width as the vector element, and the bit is set only if it is true
2962 /// for all of the elements in the vector.
2963 bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
2964  const Query &Q) {
2965  KnownBits Known(Mask.getBitWidth());
2966  computeKnownBits(V, Known, Depth, Q);
2967  return Mask.isSubsetOf(Known.Zero);
2968 }
2969 
2970 // Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow).
2971 // Returns the input and lower/upper bounds.
2972 static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
2973  const APInt *&CLow, const APInt *&CHigh) {
2974  assert(isa<Operator>(Select) &&
2975  cast<Operator>(Select)->getOpcode() == Instruction::Select &&
2976  "Input should be a Select!");
2977 
2978  const Value *LHS = nullptr, *RHS = nullptr;
2980  if (SPF != SPF_SMAX && SPF != SPF_SMIN)
2981  return false;
2982 
2983  if (!match(RHS, m_APInt(CLow)))
2984  return false;
2985 
2986  const Value *LHS2 = nullptr, *RHS2 = nullptr;
2987  SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor;
2988  if (getInverseMinMaxFlavor(SPF) != SPF2)
2989  return false;
2990 
2991  if (!match(RHS2, m_APInt(CHigh)))
2992  return false;
2993 
2994  if (SPF == SPF_SMIN)
2995  std::swap(CLow, CHigh);
2996 
2997  In = LHS2;
2998  return CLow->sle(*CHigh);
2999 }
3000 
3002  const APInt *&CLow,
3003  const APInt *&CHigh) {
3005  II->getIntrinsicID() == Intrinsic::smax) && "Must be smin/smax");
3006 
3008  auto *InnerII = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
3009  if (!InnerII || InnerII->getIntrinsicID() != InverseID ||
3010  !match(II->getArgOperand(1), m_APInt(CLow)) ||
3011  !match(InnerII->getArgOperand(1), m_APInt(CHigh)))
3012  return false;
3013 
3014  if (II->getIntrinsicID() == Intrinsic::smin)
3015  std::swap(CLow, CHigh);
3016  return CLow->sle(*CHigh);
3017 }
3018 
3019 /// For vector constants, loop over the elements and find the constant with the
3020 /// minimum number of sign bits. Return 0 if the value is not a vector constant
3021 /// or if any element was not analyzed; otherwise, return the count for the
3022 /// element with the minimum number of sign bits.
3023 static unsigned computeNumSignBitsVectorConstant(const Value *V,
3024  const APInt &DemandedElts,
3025  unsigned TyBits) {
3026  const auto *CV = dyn_cast<Constant>(V);
3027  if (!CV || !isa<FixedVectorType>(CV->getType()))
3028  return 0;
3029 
3030  unsigned MinSignBits = TyBits;
3031  unsigned NumElts = cast<FixedVectorType>(CV->getType())->getNumElements();
3032  for (unsigned i = 0; i != NumElts; ++i) {
3033  if (!DemandedElts[i])
3034  continue;
3035  // If we find a non-ConstantInt, bail out.
3036  auto *Elt = dyn_cast_or_null<ConstantInt>(CV->getAggregateElement(i));
3037  if (!Elt)
3038  return 0;
3039 
3040  MinSignBits = std::min(MinSignBits, Elt->getValue().getNumSignBits());
3041  }
3042 
3043  return MinSignBits;
3044 }
3045 
3046 static unsigned ComputeNumSignBitsImpl(const Value *V,
3047  const APInt &DemandedElts,
3048  unsigned Depth, const Query &Q);
3049 
3050 static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
3051  unsigned Depth, const Query &Q) {
3052  unsigned Result = ComputeNumSignBitsImpl(V, DemandedElts, Depth, Q);
3053  assert(Result > 0 && "At least one sign bit needs to be present!");
3054  return Result;
3055 }
3056 
3057 /// Return the number of times the sign bit of the register is replicated into
3058 /// the other bits. We know that at least 1 bit is always equal to the sign bit
3059 /// (itself), but other cases can give us information. For example, immediately
3060 /// after an "ashr X, 2", we know that the top 3 bits are all equal to each
3061 /// other, so we return 3. For vectors, return the number of sign bits for the
3062 /// vector element with the minimum number of known sign bits of the demanded
3063 /// elements in the vector specified by DemandedElts.
3064 static unsigned ComputeNumSignBitsImpl(const Value *V,
3065  const APInt &DemandedElts,
3066  unsigned Depth, const Query &Q) {
3067  Type *Ty = V->getType();
3068 
3069  // FIXME: We currently have no way to represent the DemandedElts of a scalable
3070  // vector
3071  if (isa<ScalableVectorType>(Ty))
3072  return 1;
3073 
3074 #ifndef NDEBUG
3075  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
3076 
3077  if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
3078  assert(
3079  FVTy->getNumElements() == DemandedElts.getBitWidth() &&
3080  "DemandedElt width should equal the fixed vector number of elements");
3081  } else {
3082  assert(DemandedElts == APInt(1, 1) &&
3083  "DemandedElt width should be 1 for scalars");
3084  }
3085 #endif
3086 
3087  // We return the minimum number of sign bits that are guaranteed to be present
3088  // in V, so for undef we have to conservatively return 1. We don't have the
3089  // same behavior for poison though -- that's a FIXME today.
3090 
3091  Type *ScalarTy = Ty->getScalarType();
3092  unsigned TyBits = ScalarTy->isPointerTy() ?
3093  Q.DL.getPointerTypeSizeInBits(ScalarTy) :
3094  Q.DL.getTypeSizeInBits(ScalarTy);
3095 
3096  unsigned Tmp, Tmp2;
3097  unsigned FirstAnswer = 1;
3098 
3099  // Note that ConstantInt is handled by the general computeKnownBits case
3100  // below.
3101 
3103  return 1;
3104 
3105  if (auto *U = dyn_cast<Operator>(V)) {
3106  switch (Operator::getOpcode(V)) {
3107  default: break;
3108  case Instruction::SExt:
3109  Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
3110  return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp;
3111 
3112  case Instruction::SDiv: {
3113  const APInt *Denominator;
3114  // sdiv X, C -> adds log(C) sign bits.
3115  if (match(U->getOperand(1), m_APInt(Denominator))) {
3116 
3117  // Ignore non-positive denominator.
3118  if (!Denominator->isStrictlyPositive())
3119  break;
3120 
3121  // Calculate the incoming numerator bits.
3122  unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3123 
3124  // Add floor(log(C)) bits to the numerator bits.
3125  return std::min(TyBits, NumBits + Denominator->logBase2());
3126  }
3127  break;
3128  }
3129 
3130  case Instruction::SRem: {
3131  Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3132 
3133  const APInt *Denominator;
3134  // srem X, C -> we know that the result is within [-C+1,C) when C is a
3135  // positive constant. This let us put a lower bound on the number of sign
3136  // bits.
3137  if (match(U->getOperand(1), m_APInt(Denominator))) {
3138 
3139  // Ignore non-positive denominator.
3140  if (Denominator->isStrictlyPositive()) {
3141  // Calculate the leading sign bit constraints by examining the
3142  // denominator. Given that the denominator is positive, there are two
3143  // cases:
3144  //
3145  // 1. The numerator is positive. The result range is [0,C) and
3146  // [0,C) u< (1 << ceilLogBase2(C)).
3147  //
3148  // 2. The numerator is negative. Then the result range is (-C,0] and
3149  // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
3150  //
3151  // Thus a lower bound on the number of sign bits is `TyBits -
3152  // ceilLogBase2(C)`.
3153 
3154  unsigned ResBits = TyBits - Denominator->ceilLogBase2();
3155  Tmp = std::max(Tmp, ResBits);
3156  }
3157  }
3158  return Tmp;
3159  }
3160 
3161  case Instruction::AShr: {
3162  Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3163  // ashr X, C -> adds C sign bits. Vectors too.
3164  const APInt *ShAmt;
3165  if (match(U->getOperand(1), m_APInt(ShAmt))) {
3166  if (ShAmt->uge(TyBits))
3167  break; // Bad shift.
3168  unsigned ShAmtLimited = ShAmt->getZExtValue();
3169  Tmp += ShAmtLimited;
3170  if (Tmp > TyBits) Tmp = TyBits;
3171  }
3172  return Tmp;
3173  }
3174  case Instruction::Shl: {
3175  const APInt *ShAmt;
3176  if (match(U->getOperand(1), m_APInt(ShAmt))) {
3177  // shl destroys sign bits.
3178  Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3179  if (ShAmt->uge(TyBits) || // Bad shift.
3180  ShAmt->uge(Tmp)) break; // Shifted all sign bits out.
3181  Tmp2 = ShAmt->getZExtValue();
3182  return Tmp - Tmp2;
3183  }
3184  break;
3185  }
3186  case Instruction::And:
3187  case Instruction::Or:
3188  case Instruction::Xor: // NOT is handled here.
3189  // Logical binary ops preserve the number of sign bits at the worst.
3190  Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3191  if (Tmp != 1) {
3192  Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
3193  FirstAnswer = std::min(Tmp, Tmp2);
3194  // We computed what we know about the sign bits as our first
3195  // answer. Now proceed to the generic code that uses
3196  // computeKnownBits, and pick whichever answer is better.
3197  }
3198  break;
3199 
3200  case Instruction::Select: {
3201  // If we have a clamp pattern, we know that the number of sign bits will
3202  // be the minimum of the clamp min/max range.
3203  const Value *X;
3204  const APInt *CLow, *CHigh;
3205  if (isSignedMinMaxClamp(U, X, CLow, CHigh))
3206  return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
3207 
3208  Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
3209  if (Tmp == 1) break;
3210  Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q);
3211  return std::min(Tmp, Tmp2);
3212  }
3213 
3214  case Instruction::Add:
3215  // Add can have at most one carry bit. Thus we know that the output
3216  // is, at worst, one more bit than the inputs.
3217  Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3218  if (Tmp == 1) break;
3219 
3220  // Special case decrementing a value (ADD X, -1):
3221  if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
3222  if (CRHS->isAllOnesValue()) {
3223  KnownBits Known(TyBits);
3224  computeKnownBits(U->getOperand(0), Known, Depth + 1, Q);
3225 
3226  // If the input is known to be 0 or 1, the output is 0/-1, which is
3227  // all sign bits set.
3228  if ((Known.Zero | 1).isAllOnes())
3229  return TyBits;
3230 
3231  // If we are subtracting one from a positive number, there is no carry
3232  // out of the result.
3233  if (Known.isNonNegative())
3234  return Tmp;
3235  }
3236 
3237  Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
3238  if (Tmp2 == 1) break;
3239  return std::min(Tmp, Tmp2) - 1;
3240 
3241  case Instruction::Sub:
3242  Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
3243  if (Tmp2 == 1) break;
3244 
3245  // Handle NEG.
3246  if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
3247  if (CLHS->isNullValue()) {
3248  KnownBits Known(TyBits);
3249  computeKnownBits(U->getOperand(1), Known, Depth + 1, Q);
3250  // If the input is known to be 0 or 1, the output is 0/-1, which is
3251  // all sign bits set.
3252  if ((Known.Zero | 1).isAllOnes())
3253  return TyBits;
3254 
3255  // If the input is known to be positive (the sign bit is known clear),
3256  // the output of the NEG has the same number of sign bits as the
3257  // input.
3258  if (Known.isNonNegative())
3259  return Tmp2;
3260 
3261  // Otherwise, we treat this like a SUB.
3262  }
3263 
3264  // Sub can have at most one carry bit. Thus we know that the output
3265  // is, at worst, one more bit than the inputs.
3266  Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3267  if (Tmp == 1) break;
3268  return std::min(Tmp, Tmp2) - 1;
3269 
3270  case Instruction::Mul: {
3271  // The output of the Mul can be at most twice the valid bits in the
3272  // inputs.
3273  unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3274  if (SignBitsOp0 == 1) break;
3275  unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
3276  if (SignBitsOp1 == 1) break;
3277  unsigned OutValidBits =
3278  (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1);
3279  return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1;
3280  }
3281 
3282  case Instruction::PHI: {
3283  const PHINode *PN = cast<PHINode>(U);
3284  unsigned NumIncomingValues = PN->getNumIncomingValues();
3285  // Don't analyze large in-degree PHIs.
3286  if (NumIncomingValues > 4) break;
3287  // Unreachable blocks may have zero-operand PHI nodes.
3288  if (NumIncomingValues == 0) break;
3289 
3290  // Take the minimum of all incoming values. This can't infinitely loop
3291  // because of our depth threshold.
3292  Query RecQ = Q;
3293  Tmp = TyBits;
3294  for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) {
3295  if (Tmp == 1) return Tmp;
3296  RecQ.CxtI = PN->getIncomingBlock(i)->getTerminator();
3297  Tmp = std::min(
3298  Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, RecQ));
3299  }
3300  return Tmp;
3301  }
3302 
3303  case Instruction::Trunc:
3304  // FIXME: it's tricky to do anything useful for this, but it is an
3305  // important case for targets like X86.
3306  break;
3307 
3308  case Instruction::ExtractElement:
3309  // Look through extract element. At the moment we keep this simple and
3310  // skip tracking the specific element. But at least we might find
3311  // information valid for all elements of the vector (for example if vector
3312  // is sign extended, shifted, etc).
3313  return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3314 
3315  case Instruction::ShuffleVector: {
3316  // Collect the minimum number of sign bits that are shared by every vector
3317  // element referenced by the shuffle.
3318  auto *Shuf = dyn_cast<ShuffleVectorInst>(U);
3319  if (!Shuf) {
3320  // FIXME: Add support for shufflevector constant expressions.
3321  return 1;
3322  }
3323  APInt DemandedLHS, DemandedRHS;
3324  // For undef elements, we don't know anything about the common state of
3325  // the shuffle result.
3326  if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
3327  return 1;
3329  if (!!DemandedLHS) {
3330  const Value *LHS = Shuf->getOperand(0);
3331  Tmp = ComputeNumSignBits(LHS, DemandedLHS, Depth + 1, Q);
3332  }
3333  // If we don't know anything, early out and try computeKnownBits
3334  // fall-back.
3335  if (Tmp == 1)
3336  break;
3337  if (!!DemandedRHS) {
3338  const Value *RHS = Shuf->getOperand(1);
3339  Tmp2 = ComputeNumSignBits(RHS, DemandedRHS, Depth + 1, Q);
3340  Tmp = std::min(Tmp, Tmp2);
3341  }
3342  // If we don't know anything, early out and try computeKnownBits
3343  // fall-back.
3344  if (Tmp == 1)
3345  break;
3346  assert(Tmp <= TyBits && "Failed to determine minimum sign bits");
3347  return Tmp;
3348  }
3349  case Instruction::Call: {
3350  if (const auto *II = dyn_cast<IntrinsicInst>(U)) {
3351  switch (II->getIntrinsicID()) {
3352  default: break;
3353  case Intrinsic::abs:
3354  Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3355  if (Tmp == 1) break;
3356 
3357  // Absolute value reduces number of sign bits by at most 1.
3358  return Tmp - 1;
3359  case Intrinsic::smin:
3360  case Intrinsic::smax: {
3361  const APInt *CLow, *CHigh;
3362  if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh))
3363  return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
3364  }
3365  }
3366  }
3367  }
3368  }
3369  }
3370 
3371  // Finally, if we can prove that the top bits of the result are 0's or 1's,
3372  // use this information.
3373 
3374  // If we can examine all elements of a vector constant successfully, we're
3375  // done (we can't do any better than that). If not, keep trying.
3376  if (unsigned VecSignBits =
3377  computeNumSignBitsVectorConstant(V, DemandedElts, TyBits))
3378  return VecSignBits;
3379 
3380  KnownBits Known(TyBits);
3381  computeKnownBits(V, DemandedElts, Known, Depth, Q);
3382 
3383  // If we know that the sign bit is either zero or one, determine the number of
3384  // identical bits in the top of the input value.
3385  return std::max(FirstAnswer, Known.countMinSignBits());
3386 }
3387 
3389  const TargetLibraryInfo *TLI) {
3390  const Function *F = CB.getCalledFunction();
3391  if (!F)
3392  return Intrinsic::not_intrinsic;
3393 
3394  if (F->isIntrinsic())
3395  return F->getIntrinsicID();
3396 
3397  // We are going to infer semantics of a library function based on mapping it
3398  // to an LLVM intrinsic. Check that the library function is available from
3399  // this callbase and in this environment.
3400  LibFunc Func;
3401  if (F->hasLocalLinkage() || !TLI || !TLI->getLibFunc(CB, Func) ||
3402  !CB.onlyReadsMemory())
3403  return Intrinsic::not_intrinsic;
3404 
3405  switch (Func) {
3406  default:
3407  break;
3408  case LibFunc_sin:
3409  case LibFunc_sinf:
3410  case LibFunc_sinl:
3411  return Intrinsic::sin;
3412  case LibFunc_cos:
3413  case LibFunc_cosf:
3414  case LibFunc_cosl:
3415  return Intrinsic::cos;
3416  case LibFunc_exp:
3417  case LibFunc_expf:
3418  case LibFunc_expl:
3419  return Intrinsic::exp;
3420  case LibFunc_exp2:
3421  case LibFunc_exp2f:
3422  case LibFunc_exp2l:
3423  return Intrinsic::exp2;
3424  case LibFunc_log:
3425  case LibFunc_logf:
3426  case LibFunc_logl:
3427  return Intrinsic::log;
3428  case LibFunc_log10:
3429  case LibFunc_log10f:
3430  case LibFunc_log10l:
3431  return Intrinsic::log10;
3432  case LibFunc_log2:
3433  case LibFunc_log2f:
3434  case LibFunc_log2l:
3435  return Intrinsic::log2;
3436  case LibFunc_fabs:
3437  case LibFunc_fabsf:
3438  case LibFunc_fabsl:
3439  return Intrinsic::fabs;
3440  case LibFunc_fmin:
3441  case LibFunc_fminf:
3442  case LibFunc_fminl:
3443  return Intrinsic::minnum;
3444  case LibFunc_fmax:
3445  case LibFunc_fmaxf:
3446  case LibFunc_fmaxl:
3447  return Intrinsic::maxnum;
3448  case LibFunc_copysign:
3449  case LibFunc_copysignf:
3450  case LibFunc_copysignl:
3451  return Intrinsic::copysign;
3452  case LibFunc_floor:
3453  case LibFunc_floorf:
3454  case LibFunc_floorl:
3455  return Intrinsic::floor;
3456  case LibFunc_ceil:
3457  case LibFunc_ceilf:
3458  case LibFunc_ceill:
3459  return Intrinsic::ceil;
3460  case LibFunc_trunc:
3461  case LibFunc_truncf:
3462  case LibFunc_truncl:
3463  return Intrinsic::trunc;
3464  case LibFunc_rint:
3465  case LibFunc_rintf:
3466  case LibFunc_rintl:
3467  return Intrinsic::rint;
3468  case LibFunc_nearbyint:
3469  case LibFunc_nearbyintf:
3470  case LibFunc_nearbyintl:
3471  return Intrinsic::nearbyint;
3472  case LibFunc_round:
3473  case LibFunc_roundf:
3474  case LibFunc_roundl:
3475  return Intrinsic::round;
3476  case LibFunc_roundeven:
3477  case LibFunc_roundevenf:
3478  case LibFunc_roundevenl:
3479  return Intrinsic::roundeven;
3480  case LibFunc_pow:
3481  case LibFunc_powf:
3482  case LibFunc_powl:
3483  return Intrinsic::pow;
3484  case LibFunc_sqrt:
3485  case LibFunc_sqrtf:
3486  case LibFunc_sqrtl:
3487  return Intrinsic::sqrt;
3488  }
3489 
3490  return Intrinsic::not_intrinsic;
3491 }
3492 
3493 /// Return true if we can prove that the specified FP value is never equal to
3494 /// -0.0.
3495 /// NOTE: Do not check 'nsz' here because that fast-math-flag does not guarantee
3496 /// that a value is not -0.0. It only guarantees that -0.0 may be treated
3497 /// the same as +0.0 in floating-point ops.
3499  unsigned Depth) {
3500  if (auto *CFP = dyn_cast<ConstantFP>(V))
3501  return !CFP->getValueAPF().isNegZero();
3502 
3504  return false;
3505 
3506  auto *Op = dyn_cast<Operator>(V);
3507  if (!Op)
3508  return false;
3509 
3510  // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
3511  if (match(Op, m_FAdd(m_Value(), m_PosZeroFP())))
3512  return true;
3513 
3514  // sitofp and uitofp turn into +0.0 for zero.
3515  if (isa<SIToFPInst>(Op) || isa<UIToFPInst>(Op))
3516  return true;
3517 
3518  if (auto *Call = dyn_cast<CallInst>(Op)) {
3519  Intrinsic::ID IID = getIntrinsicForCallSite(*Call, TLI);
3520  switch (IID) {
3521  default:
3522  break;
3523  // sqrt(-0.0) = -0.0, no other negative results are possible.
3524  case Intrinsic::sqrt:
3525  case Intrinsic::canonicalize:
3526  return CannotBeNegativeZero(Call->getArgOperand(0), TLI, Depth + 1);
3527  case Intrinsic::experimental_constrained_sqrt: {
3528  // NOTE: This rounding mode restriction may be too strict.
3529  const auto *CI = cast<ConstrainedFPIntrinsic>(Call);
3530  if (CI->getRoundingMode() == RoundingMode::NearestTiesToEven)
3531  return CannotBeNegativeZero(Call->getArgOperand(0), TLI, Depth + 1);
3532  else
3533  return false;
3534  }
3535  // fabs(x) != -0.0
3536  case Intrinsic::fabs:
3537  return true;
3538  // sitofp and uitofp turn into +0.0 for zero.
3539  case Intrinsic::experimental_constrained_sitofp:
3540  case Intrinsic::experimental_constrained_uitofp:
3541  return true;
3542  }
3543  }
3544 
3545  return false;
3546 }
3547 
3548 /// If \p SignBitOnly is true, test for a known 0 sign bit rather than a
3549 /// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign
3550 /// bit despite comparing equal.
3552  const TargetLibraryInfo *TLI,
3553  bool SignBitOnly,
3554  unsigned Depth) {
3555  // TODO: This function does not do the right thing when SignBitOnly is true
3556  // and we're lowering to a hypothetical IEEE 754-compliant-but-evil platform
3557  // which flips the sign bits of NaNs. See
3558  // https://llvm.org/bugs/show_bug.cgi?id=31702.
3559 
3560  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
3561  return !CFP->getValueAPF().isNegative() ||
3562  (!SignBitOnly && CFP->getValueAPF().isZero());
3563  }
3564 
3565  // Handle vector of constants.
3566  if (auto *CV = dyn_cast<Constant>(V)) {
3567  if (auto *CVFVTy = dyn_cast<FixedVectorType>(CV->getType())) {
3568  unsigned NumElts = CVFVTy->getNumElements();
3569  for (unsigned i = 0; i != NumElts; ++i) {
3570  auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
3571  if (!CFP)
3572  return false;
3573  if (CFP->getValueAPF().isNegative() &&
3574  (SignBitOnly || !CFP->getValueAPF().isZero()))
3575  return false;
3576  }
3577 
3578  // All non-negative ConstantFPs.
3579  return true;
3580  }
3581  }
3582 
3584  return false;
3585 
3586  const Operator *I = dyn_cast<Operator>(V);
3587  if (!I)
3588  return false;
3589 
3590  switch (I->getOpcode()) {
3591  default:
3592  break;
3593  // Unsigned integers are always nonnegative.
3594  case Instruction::UIToFP:
3595  return true;
3596  case Instruction::FMul:
3597  case Instruction::FDiv:
3598  // X * X is always non-negative or a NaN.
3599  // X / X is always exactly 1.0 or a NaN.
3600  if (I->getOperand(0) == I->getOperand(1) &&
3601  (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()))
3602  return true;
3603 
3605  case Instruction::FAdd:
3606  case Instruction::FRem:
3607  return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
3608  Depth + 1) &&
3609  cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
3610  Depth + 1);
3611  case Instruction::Select:
3612  return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
3613  Depth + 1) &&
3614  cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly,
3615  Depth + 1);
3616  case Instruction::FPExt:
3617  case Instruction::FPTrunc:
3618  // Widening/narrowing never change sign.
3619  return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
3620  Depth + 1);
3621  case Instruction::ExtractElement:
3622  // Look through extract element. At the moment we keep this simple and skip
3623  // tracking the specific element. But at least we might find information
3624  // valid for all elements of the vector.
3625  return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
3626  Depth + 1);
3627  case Instruction::Call:
3628  const auto *CI = cast<CallInst>(I);
3629  Intrinsic::ID IID = getIntrinsicForCallSite(*CI, TLI);
3630  switch (IID) {
3631  default:
3632  break;
3633  case Intrinsic::maxnum: {
3634  Value *V0 = I->getOperand(0), *V1 = I->getOperand(1);
3635  auto isPositiveNum = [&](Value *V) {
3636  if (SignBitOnly) {
3637  // With SignBitOnly, this is tricky because the result of
3638  // maxnum(+0.0, -0.0) is unspecified. Just check if the operand is
3639  // a constant strictly greater than 0.0.
3640  const APFloat *C;
3641  return match(V, m_APFloat(C)) &&
3642  *C > APFloat::getZero(C->getSemantics());
3643  }
3644 
3645  // -0.0 compares equal to 0.0, so if this operand is at least -0.0,
3646  // maxnum can't be ordered-less-than-zero.
3647  return isKnownNeverNaN(V, TLI) &&
3648  cannotBeOrderedLessThanZeroImpl(V, TLI, false, Depth + 1);
3649  };
3650 
3651  // TODO: This could be improved. We could also check that neither operand
3652  // has its sign bit set (and at least 1 is not-NAN?).
3653  return isPositiveNum(V0) || isPositiveNum(V1);
3654  }
3655 
3656  case Intrinsic::maximum:
3657  return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
3658  Depth + 1) ||
3659  cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
3660  Depth + 1);
3661  case Intrinsic::minnum:
3662  case Intrinsic::minimum:
3663  return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
3664  Depth + 1) &&
3665  cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
3666  Depth + 1);
3667  case Intrinsic::exp:
3668  case Intrinsic::exp2:
3669  case Intrinsic::fabs:
3670  return true;
3671 
3672  case Intrinsic::sqrt:
3673  // sqrt(x) is always >= -0 or NaN. Moreover, sqrt(x) == -0 iff x == -0.
3674  if (!SignBitOnly)
3675  return true;
3676  return CI->hasNoNaNs() && (CI->hasNoSignedZeros() ||
3677  CannotBeNegativeZero(CI->getOperand(0), TLI));
3678 
3679  case Intrinsic::powi:
3680  if (ConstantInt *Exponent = dyn_cast<ConstantInt>(I->getOperand(1))) {
3681  // powi(x,n) is non-negative if n is even.
3682  if (Exponent->getBitWidth() <= 64 && Exponent->getSExtValue() % 2u == 0)
3683  return true;
3684  }
3685  // TODO: This is not correct. Given that exp is an integer, here are the
3686  // ways that pow can return a negative value:
3687  //
3688  // pow(x, exp) --> negative if exp is odd and x is negative.
3689  // pow(-0, exp) --> -inf if exp is negative odd.
3690  // pow(-0, exp) --> -0 if exp is positive odd.
3691  // pow(-inf, exp) --> -0 if exp is negative odd.
3692  // pow(-inf, exp) --> -inf if exp is positive odd.
3693  //
3694  // Therefore, if !SignBitOnly, we can return true if x >= +0 or x is NaN,
3695  // but we must return false if x == -0. Unfortunately we do not currently
3696  // have a way of expressing this constraint. See details in
3697  // https://llvm.org/bugs/show_bug.cgi?id=31702.
3698  return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
3699  Depth + 1);
3700 
3701  case Intrinsic::fma:
3702  case Intrinsic::fmuladd:
3703  // x*x+y is non-negative if y is non-negative.
3704  return I->getOperand(0) == I->getOperand(1) &&
3705  (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()) &&
3706  cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly,
3707  Depth + 1);
3708  }
3709  break;
3710  }
3711  return false;
3712 }
3713 
3715  const TargetLibraryInfo *TLI) {
3716  return cannotBeOrderedLessThanZeroImpl(V, TLI, false, 0);
3717 }
3718 
3720  return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0);
3721 }
3722 
3724  unsigned Depth) {
3725  assert(V->getType()->isFPOrFPVectorTy() && "Querying for Inf on non-FP type");
3726 
3727  // If we're told that infinities won't happen, assume they won't.
3728  if (auto *FPMathOp = dyn_cast<FPMathOperator>(V))
3729  if (FPMathOp->hasNoInfs())
3730  return true;
3731 
3732  // Handle scalar constants.
3733  if (auto *CFP = dyn_cast<ConstantFP>(V))
3734  return !CFP->isInfinity();
3735 
3737  return false;
3738 
3739  if (auto *Inst = dyn_cast<Instruction>(V)) {
3740  switch (Inst->getOpcode()) {
3741  case Instruction::Select: {
3742  return isKnownNeverInfinity(Inst->getOperand(1), TLI, Depth + 1) &&
3743  isKnownNeverInfinity(Inst->getOperand(2), TLI, Depth + 1);
3744  }
3745  case Instruction::SIToFP:
3746  case Instruction::UIToFP: {
3747  // Get width of largest magnitude integer (remove a bit if signed).
3748  // This still works for a signed minimum value because the largest FP
3749  // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx).
3750  int IntSize = Inst->getOperand(0)->getType()->getScalarSizeInBits();
3751  if (Inst->getOpcode() == Instruction::SIToFP)
3752  --IntSize;
3753 
3754  // If the exponent of the largest finite FP value can hold the largest
3755  // integer, the result of the cast must be finite.
3756  Type *FPTy = Inst->getType()->getScalarType();
3757  return ilogb(APFloat::getLargest(FPTy->getFltSemantics())) >= IntSize;
3758  }
3759  default:
3760  break;
3761  }
3762  }
3763 
3764  // try to handle fixed width vector constants
3765  auto *VFVTy = dyn_cast<FixedVectorType>(V->getType());
3766  if (VFVTy && isa<Constant>(V)) {
3767  // For vectors, verify that each element is not infinity.
3768  unsigned NumElts = VFVTy->getNumElements();
3769  for (unsigned i = 0; i != NumElts; ++i) {
3770  Constant *Elt = cast<Constant>(V)->getAggregateElement(i);
3771  if (!Elt)
3772  return false;
3773  if (isa<UndefValue>(Elt))
3774  continue;
3775  auto *CElt = dyn_cast<ConstantFP>(Elt);
3776  if (!CElt || CElt->isInfinity())
3777  return false;
3778  }
3779  // All elements were confirmed non-infinity or undefined.
3780  return true;
3781  }
3782 
3783  // was not able to prove that V never contains infinity
3784  return false;
3785 }
3786 
3788  unsigned Depth) {
3789  assert(V->getType()->isFPOrFPVectorTy() && "Querying for NaN on non-FP type");
3790 
3791  // If we're told that NaNs won't happen, assume they won't.
3792  if (auto *FPMathOp = dyn_cast<FPMathOperator>(V))
3793  if (FPMathOp->hasNoNaNs())
3794  return true;
3795 
3796  // Handle scalar constants.
3797  if (auto *CFP = dyn_cast<ConstantFP>(V))
3798  return !CFP->isNaN();
3799 
3801  return false;
3802 
3803  if (auto *Inst = dyn_cast<Instruction>(V)) {
3804  switch (Inst->getOpcode()) {
3805  case Instruction::FAdd:
3806  case Instruction::FSub:
3807  // Adding positive and negative infinity produces NaN.
3808  return isKnownNeverNaN(Inst->getOperand(0), TLI, Depth + 1) &&
3809  isKnownNeverNaN(Inst->getOperand(1), TLI, Depth + 1) &&
3810  (isKnownNeverInfinity(Inst->getOperand(0), TLI, Depth + 1) ||
3811  isKnownNeverInfinity(Inst->getOperand(1), TLI, Depth + 1));
3812 
3813  case Instruction::FMul:
3814  // Zero multiplied with infinity produces NaN.
3815  // FIXME: If neither side can be zero fmul never produces NaN.
3816  return isKnownNeverNaN(Inst->getOperand(0), TLI, Depth + 1) &&
3817  isKnownNeverInfinity(Inst->getOperand(0), TLI, Depth + 1) &&
3818  isKnownNeverNaN(Inst->getOperand(1), TLI, Depth + 1) &&
3819  isKnownNeverInfinity(Inst->getOperand(1), TLI, Depth + 1);
3820 
3821  case Instruction::FDiv:
3822  case Instruction::FRem:
3823  // FIXME: Only 0/0, Inf/Inf, Inf REM x and x REM 0 produce NaN.
3824  return false;
3825 
3826  case Instruction::Select: {
3827  return isKnownNeverNaN(Inst->getOperand(1), TLI, Depth + 1) &&
3828  isKnownNeverNaN(Inst->getOperand(2), TLI, Depth + 1);
3829  }
3830  case Instruction::SIToFP:
3831  case Instruction::UIToFP:
3832  return true;
3833  case Instruction::FPTrunc:
3834  case Instruction::FPExt:
3835  return isKnownNeverNaN(Inst->getOperand(0), TLI, Depth + 1);
3836  default:
3837  break;
3838  }
3839  }
3840 
3841  if (const auto *II = dyn_cast<IntrinsicInst>(V)) {
3842  switch (II->getIntrinsicID()) {
3843  case Intrinsic::canonicalize:
3844  case Intrinsic::fabs:
3845  case Intrinsic::copysign:
3846  case Intrinsic::exp:
3847  case Intrinsic::exp2:
3848  case Intrinsic::floor:
3849  case Intrinsic::ceil:
3850  case Intrinsic::trunc:
3851  case Intrinsic::rint:
3852  case Intrinsic::nearbyint:
3853  case Intrinsic::round:
3854  case Intrinsic::roundeven:
3855  return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1);
3856  case Intrinsic::sqrt:
3857  return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1) &&
3858  CannotBeOrderedLessThanZero(II->getArgOperand(0), TLI);
3859  case Intrinsic::minnum:
3860  case Intrinsic::maxnum:
3861  // If either operand is not NaN, the result is not NaN.
3862  return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1) ||
3863  isKnownNeverNaN(II->getArgOperand(1), TLI, Depth + 1);
3864  default:
3865  return false;
3866  }
3867  }
3868 
3869  // Try to handle fixed width vector constants
3870  auto *VFVTy = dyn_cast<FixedVectorType>(V->getType());
3871  if (VFVTy && isa<Constant>(V)) {
3872  // For vectors, verify that each element is not NaN.
3873  unsigned NumElts = VFVTy->getNumElements();
3874  for (unsigned i = 0; i != NumElts; ++i) {
3875  Constant *Elt = cast<Constant>(V)->getAggregateElement(i);
3876  if (!Elt)
3877  return false;
3878  if (isa<UndefValue>(Elt))
3879  continue;
3880  auto *CElt = dyn_cast<ConstantFP>(Elt);
3881  if (!CElt || CElt->isNaN())
3882  return false;
3883  }
3884  // All elements were confirmed not-NaN or undefined.
3885  return true;
3886  }
3887 
3888  // Was not able to prove that V never contains NaN
3889  return false;
3890 }
3891 
3893 
3894  // All byte-wide stores are splatable, even of arbitrary variables.
3895  if (V->getType()->isIntegerTy(8))
3896  return V;
3897 
3898  LLVMContext &Ctx = V->getContext();
3899 
3900  // Undef don't care.
3901  auto *UndefInt8 = UndefValue::get(Type::getInt8Ty(Ctx));
3902  if (isa<UndefValue>(V))
3903  return UndefInt8;
3904 
3905  // Return Undef for zero-sized type.
3906  if (!DL.getTypeStoreSize(V->getType()).isNonZero())
3907  return UndefInt8;
3908 
3909  Constant *C = dyn_cast<Constant>(V);
3910  if (!C) {
3911  // Conceptually, we could handle things like:
3912  // %a = zext i8 %X to i16
3913  // %b = shl i16 %a, 8
3914  // %c = or i16 %a, %b
3915  // but until there is an example that actually needs this, it doesn't seem
3916  // worth worrying about.
3917  return nullptr;
3918  }
3919 
3920  // Handle 'null' ConstantArrayZero etc.
3921  if (C->isNullValue())
3923 
3924  // Constant floating-point values can be handled as integer values if the
3925  // corresponding integer value is "byteable". An important case is 0.0.
3926  if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
3927  Type *Ty = nullptr;
3928  if (CFP->getType()->isHalfTy())
3929  Ty = Type::getInt16Ty(Ctx);
3930  else if (CFP->getType()->isFloatTy())
3931  Ty = Type::getInt32Ty(Ctx);
3932  else if (CFP->getType()->isDoubleTy())
3933  Ty = Type::getInt64Ty(Ctx);
3934  // Don't handle long double formats, which have strange constraints.
3935  return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty), DL)
3936  : nullptr;
3937  }
3938 
3939  // We can handle constant integers that are multiple of 8 bits.
3940  if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
3941  if (CI->getBitWidth() % 8 == 0) {
3942  assert(CI->getBitWidth() > 8 && "8 bits should be handled above!");
3943  if (!CI->getValue().isSplat(8))
3944  return nullptr;
3945  return ConstantInt::get(Ctx, CI->getValue().trunc(8));
3946  }
3947  }
3948 
3949  if (auto *CE = dyn_cast<ConstantExpr>(C)) {
3950  if (CE->getOpcode() == Instruction::IntToPtr) {
3951  if (auto *PtrTy = dyn_cast<PointerType>(CE->getType())) {
3952  unsigned BitWidth = DL.getPointerSizeInBits(PtrTy->getAddressSpace());
3953  return isBytewiseValue(
3954  ConstantExpr::getIntegerCast(CE->getOperand(0),
3955  Type::getIntNTy(Ctx, BitWidth), false),
3956  DL);
3957  }
3958  }
3959  }
3960 
3961  auto Merge = [&](Value *LHS, Value *RHS) -> Value * {
3962  if (LHS == RHS)
3963  return LHS;
3964  if (!LHS || !RHS)
3965  return nullptr;
3966  if (LHS == UndefInt8)
3967  return RHS;
3968  if (RHS == UndefInt8)
3969  return LHS;
3970  return nullptr;
3971  };
3972 
3973  if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(C)) {
3974  Value *Val = UndefInt8;
3975  for (unsigned I = 0, E = CA->getNumElements(); I != E; ++I)
3976  if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I), DL))))
3977  return nullptr;
3978  return Val;
3979  }
3980 
3981  if (isa<ConstantAggregate>(C)) {
3982  Value *Val = UndefInt8;
3983  for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I)
3984  if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I), DL))))
3985  return nullptr;
3986  return Val;
3987  }
3988 
3989  // Don't try to handle the handful of other constants.
3990  return nullptr;
3991 }
3992 
3993 // This is the recursive version of BuildSubAggregate. It takes a few different
3994 // arguments. Idxs is the index within the nested struct From that we are
3995 // looking at now (which is of type IndexedType). IdxSkip is the number of
3996 // indices from Idxs that should be left out when inserting into the resulting
3997 // struct. To is the result struct built so far, new insertvalue instructions
3998 // build on that.
3999 static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType,
4001  unsigned IdxSkip,
4002  Instruction *InsertBefore) {
4003  StructType *STy = dyn_cast<StructType>(IndexedType);
4004  if (STy) {
4005  // Save the original To argument so we can modify it
4006  Value *OrigTo = To;
4007  // General case, the type indexed by Idxs is a struct
4008  for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
4009  // Process each struct element recursively
4010  Idxs.push_back(i);
4011  Value *PrevTo = To;
4012  To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip,
4013  InsertBefore);
4014  Idxs.pop_back();
4015  if (!To) {
4016  // Couldn't find any inserted value for this index? Cleanup
4017  while (PrevTo != OrigTo) {
4018  InsertValueInst* Del = cast<InsertValueInst>(PrevTo);
4019  PrevTo = Del->getAggregateOperand();
4020  Del->eraseFromParent();
4021  }
4022  // Stop processing elements
4023  break;
4024  }
4025  }
4026  // If we successfully found a value for each of our subaggregates
4027  if (To)
4028  return To;
4029  }
4030  // Base case, the type indexed by SourceIdxs is not a struct, or not all of
4031  // the struct's elements had a value that was inserted directly. In the latter
4032  // case, perhaps we can't determine each of the subelements individually, but
4033  // we might be able to find the complete struct somewhere.
4034 
4035  // Find the value that is at that particular spot
4036  Value *V = FindInsertedValue(From, Idxs);
4037 
4038  if (!V)
4039  return nullptr;
4040 
4041  // Insert the value in the new (sub) aggregate
4042  return InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip),
4043  "tmp", InsertBefore);
4044 }
4045 
4046 // This helper takes a nested struct and extracts a part of it (which is again a
4047 // struct) into a new value. For example, given the struct:
4048 // { a, { b, { c, d }, e } }
4049 // and the indices "1, 1" this returns
4050 // { c, d }.
4051 //
4052 // It does this by inserting an insertvalue for each element in the resulting
4053 // struct, as opposed to just inserting a single struct. This will only work if
4054 // each of the elements of the substruct are known (ie, inserted into From by an
4055 // insertvalue instruction somewhere).
4056 //
4057 // All inserted insertvalue instructions are inserted before InsertBefore
4059  Instruction *InsertBefore) {
4060  assert(InsertBefore && "Must have someplace to insert!");
4061  Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(),
4062  idx_range);
4063  Value *To = UndefValue::get(IndexedType);
4064  SmallVector<unsigned, 10> Idxs(idx_range.begin(), idx_range.end());
4065  unsigned IdxSkip = Idxs.size();
4066 
4067  return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
4068 }
4069 
4070 /// Given an aggregate and a sequence of indices, see if the scalar value
4071 /// indexed is already around as a register, for example if it was inserted
4072 /// directly into the aggregate.
4073 ///
4074 /// If InsertBefore is not null, this function will duplicate (modified)
4075 /// insertvalues when a part of a nested struct is extracted.
4077  Instruction *InsertBefore) {
4078  // Nothing to index? Just return V then (this is useful at the end of our
4079  // recursion).
4080  if (idx_range.empty())
4081  return V;
4082  // We have indices, so V should have an indexable type.
4083  assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) &&
4084  "Not looking at a struct or array?");
4085  assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) &&
4086  "Invalid indices for type?");
4087 
4088  if (Constant *C = dyn_cast<Constant>(V)) {
4089  C = C->getAggregateElement(idx_range[0]);
4090  if (!C) return nullptr;
4091  return FindInsertedValue(C, idx_range.slice(1), InsertBefore);
4092  }
4093 
4094  if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
4095  // Loop the indices for the insertvalue instruction in parallel with the
4096  // requested indices
4097  const unsigned *req_idx = idx_range.begin();
4098  for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
4099  i != e; ++i, ++req_idx) {
4100  if (req_idx == idx_range.end()) {
4101  // We can't handle this without inserting insertvalues
4102  if (!InsertBefore)
4103  return nullptr;
4104 
4105  // The requested index identifies a part of a nested aggregate. Handle
4106  // this specially. For example,
4107  // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
4108  // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
4109  // %C = extractvalue {i32, { i32, i32 } } %B, 1
4110  // This can be changed into
4111  // %A = insertvalue {i32, i32 } undef, i32 10, 0
4112  // %C = insertvalue {i32, i32 } %A, i32 11, 1
4113  // which allows the unused 0,0 element from the nested struct to be
4114  // removed.
4115  return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx),
4116  InsertBefore);
4117  }
4118 
4119  // This insert value inserts something else than what we are looking for.
4120  // See if the (aggregate) value inserted into has the value we are
4121  // looking for, then.
4122  if (*req_idx != *i)
4123  return FindInsertedValue(I->getAggregateOperand(), idx_range,
4124  InsertBefore);
4125  }
4126  // If we end up here, the indices of the insertvalue match with those
4127  // requested (though possibly only partially). Now we recursively look at
4128  // the inserted value, passing any remaining indices.
4129  return FindInsertedValue(I->getInsertedValueOperand(),
4130  makeArrayRef(req_idx, idx_range.end()),
4131  InsertBefore);
4132  }
4133 
4134  if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
4135  // If we're extracting a value from an aggregate that was extracted from
4136  // something else, we can extract from that something else directly instead.
4137  // However, we will need to chain I's indices with the requested indices.
4138 
4139  // Calculate the number of indices required
4140  unsigned size = I->getNumIndices() + idx_range.size();
4141  // Allocate some space to put the new indices in
4143  Idxs.reserve(size);
4144  // Add indices from the extract value instruction
4145  Idxs.append(I->idx_begin(), I->idx_end());
4146 
4147  // Add requested indices
4148  Idxs.append(idx_range.begin(), idx_range.end());
4149 
4150  assert(Idxs.size() == size
4151  && "Number of indices added not correct?");
4152 
4153  return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore);
4154  }
4155  // Otherwise, we don't know (such as, extracting from a function return value
4156  // or load instruction)
4157  return nullptr;
4158 }
4159 
4161  unsigned CharSize) {
4162  // Make sure the GEP has exactly three arguments.
4163  if (GEP->getNumOperands() != 3)
4164  return false;
4165 
4166  // Make sure the index-ee is a pointer to array of \p CharSize integers.
4167  // CharSize.
4168  ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType());
4169  if (!AT || !AT->getElementType()->isIntegerTy(CharSize))
4170  return false;
4171 
4172  // Check to make sure that the first operand of the GEP is an integer and
4173  // has value 0 so that we are sure we're indexing into the initializer.
4174  const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
4175  if (!FirstIdx || !FirstIdx->isZero())
4176  return false;
4177 
4178  return true;
4179 }
4180 
4181 // If V refers to an initialized global constant, set Slice either to
4182 // its initializer if the size of its elements equals ElementSize, or,
4183 // for ElementSize == 8, to its representation as an array of unsiged
4184 // char. Return true on success.
4186  ConstantDataArraySlice &Slice,
4187  unsigned ElementSize, uint64_t Offset) {
4188  assert(V);
4189 
4190  // Look through bitcast instructions and geps.
4191  V = V->stripPointerCasts();
4192 
4193  // If the value is a GEP instruction or constant expression, treat it as an
4194  // offset.
4195  if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
4196  // Fail if the first GEP operand is not a constant zero and we're
4197  // not indexing into the initializer.
4198  const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
4199  if (!FirstIdx || !FirstIdx->isZero())
4200  return false;
4201 
4202  Value *Op0 = GEP->getOperand(0);
4203  const GlobalVariable *GV = dyn_cast<GlobalVariable>(Op0);
4204  if (!GV)
4205  return false;
4206 
4207  // Fail if the offset into the initializer is not constant.
4208  const DataLayout &DL = GV->getParent()->getDataLayout();
4209  APInt Off(DL.getIndexSizeInBits(GEP->getPointerAddressSpace()), 0);
4210  if (!GEP->accumulateConstantOffset(DL, Off))
4211  return false;
4212 
4213  // Fail if the constant offset is excessive.
4214  uint64_t StartIdx = Off.getLimitedValue();
4215  if (StartIdx == UINT64_MAX)
4216  return false;
4217 
4218  return getConstantDataArrayInfo(Op0, Slice, ElementSize, StartIdx + Offset);
4219  }
4220 
4221  // The GEP instruction, constant or instruction, must reference a global
4222  // variable that is a constant and is initialized. The referenced constant
4223  // initializer is the array that we'll use for optimization.
4224  const GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
4225  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
4226  return false;
4227 
4228  const DataLayout &DL = GV->getParent()->getDataLayout();
4229  ConstantDataArray *Array = nullptr;
4230  ArrayType *ArrayTy = nullptr;
4231 
4232  if (GV->getInitializer()->isNullValue()) {
4233  Type *GVTy = GV->getValueType();
4234  uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy).getFixedSize();
4235  uint64_t Length = SizeInBytes / (ElementSize / 8);
4236  if (Length <= Offset)
4237  // Bail on undersized constants to let sanitizers detect library
4238  // calls with them as arguments.
4239  return false;
4240 
4241  Slice.Array = nullptr;
4242  Slice.Offset = 0;
4243  Slice.Length = Length - Offset;
4244  return true;
4245  }
4246 
4247  auto *Init = const_cast<Constant *>(GV->getInitializer());
4248  if (auto *ArrayInit = dyn_cast<ConstantDataArray>(Init)) {
4249  Type *InitElTy = ArrayInit->getElementType();
4250  if (InitElTy->isIntegerTy(ElementSize)) {
4251  // If Init is an initializer for an array of the expected type
4252  // and size, use it as is.
4253  Array = ArrayInit;
4254  ArrayTy = ArrayInit->getType();
4255  }
4256  }
4257 
4258  if (!Array) {
4259  if (ElementSize != 8)
4260  // TODO: Handle conversions to larger integral types.
4261  return false;
4262 
4263  // Otherwise extract the portion of the initializer starting
4264  // at Offset as an array of bytes, and reset Offset.
4265  Init = ReadByteArrayFromGlobal(GV, Offset);
4266  if (!Init)
4267  return false;
4268 
4269  Offset = 0;
4270  Array = dyn_cast<ConstantDataArray>(Init);
4271  ArrayTy = dyn_cast<ArrayType>(Init->getType());
4272  }
4273 
4274  uint64_t NumElts = ArrayTy->getArrayNumElements();
4275  if (Offset > NumElts)
4276  return false;
4277 
4278  Slice.Array = Array;
4279  Slice.Offset = Offset;
4280  Slice.Length = NumElts - Offset;
4281  return true;
4282 }
4283 
4284 /// This function computes the length of a null-terminated C string pointed to
4285 /// by V. If successful, it returns true and returns the string in Str.
4286 /// If unsuccessful, it returns false.
4288  uint64_t Offset, bool TrimAtNul) {
4289  ConstantDataArraySlice Slice;
4290  if (!getConstantDataArrayInfo(V, Slice, 8, Offset))
4291  return false;
4292 
4293  if (Slice.Array == nullptr) {
4294  if (TrimAtNul) {
4295  Str = StringRef();
4296  return true;
4297  }
4298  if (Slice.Length == 1) {
4299  Str = StringRef("", 1);
4300  return true;
4301  }
4302  // We cannot instantiate a StringRef as we do not have an appropriate string
4303  // of 0s at hand.
4304  return false;
4305  }
4306 
4307  // Start out with the entire array in the StringRef.
4308  Str = Slice.Array->getAsString();
4309  // Skip over 'offset' bytes.
4310  Str = Str.substr(Slice.Offset);
4311 
4312  if (TrimAtNul) {
4313  // Trim off the \0 and anything after it. If the array is not nul
4314  // terminated, we just return the whole end of string. The client may know
4315  // some other way that the string is length-bound.
4316  Str = Str.substr(0, Str.find('\0'));
4317  }
4318  return true;
4319 }
4320 
4321 // These next two are very similar to the above, but also look through PHI
4322 // nodes.
4323 // TODO: See if we can integrate these two together.
4324 
4325 /// If we can compute the length of the string pointed to by
4326 /// the specified pointer, return 'len+1'. If we can't, return 0.
4329  unsigned CharSize) {
4330  // Look through noop bitcast instructions.
4331  V = V->stripPointerCasts();
4332 
4333  // If this is a PHI node, there are two cases: either we have already seen it
4334  // or we haven't.
4335  if (const PHINode *PN = dyn_cast<PHINode>(V)) {
4336  if (!PHIs.insert(PN).second)
4337  return ~0ULL; // already in the set.
4338 
4339  // If it was new, see if all the input strings are the same length.
4340  uint64_t LenSoFar = ~0ULL;
4341  for (Value *IncValue : PN->incoming_values()) {
4342  uint64_t Len = GetStringLengthH(IncValue, PHIs, CharSize);
4343  if (Len == 0) return 0; // Unknown length -> unknown.
4344 
4345  if (Len == ~0ULL) continue;
4346 
4347  if (Len != LenSoFar && LenSoFar != ~0ULL)
4348  return 0; // Disagree -> unknown.
4349  LenSoFar = Len;
4350  }
4351 
4352  // Success, all agree.
4353  return LenSoFar;
4354  }
4355 
4356  // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
4357  if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
4358  uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs, CharSize);
4359  if (Len1 == 0) return 0;
4360  uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs, CharSize);
4361  if (Len2 == 0) return 0;
4362  if (Len1 == ~0ULL) return Len2;
4363  if (Len2 == ~0ULL) return Len1;
4364  if (Len1 != Len2) return 0;
4365  return Len1;
4366  }
4367 
4368  // Otherwise, see if we can read the string.
4369  ConstantDataArraySlice Slice;
4370  if (!getConstantDataArrayInfo(V, Slice, CharSize))
4371  return 0;
4372 
4373  if (Slice.Array == nullptr)
4374  return 1;
4375 
4376  // Search for nul characters
4377  unsigned NullIndex = 0;
4378  for (unsigned E = Slice.Length; NullIndex < E; ++NullIndex) {
4379  if (Slice.Array->getElementAsInteger(Slice.Offset + NullIndex) == 0)
4380  break;
4381  }
4382 
4383  return NullIndex + 1;
4384 }
4385 
4386 /// If we can compute the length of the string pointed to by
4387 /// the specified pointer, return 'len+1'. If we can't, return 0.
4388 uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) {
4389  if (!V->getType()->isPointerTy())
4390  return 0;
4391 
4393  uint64_t Len = GetStringLengthH(V, PHIs, CharSize);
4394  // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
4395  // an empty string as a length.
4396  return Len == ~0ULL ? 1 : Len;
4397 }
4398 
4399 const Value *
4401  bool MustPreserveNullness) {
4402  assert(Call &&
4403  "getArgumentAliasingToReturnedPointer only works on nonnull calls");
4404  if (const Value *RV = Call->getReturnedArgOperand())
4405  return RV;
4406  // This can be used only as a aliasing property.
4408  Call, MustPreserveNullness))
4409  return Call->getArgOperand(0);
4410  return nullptr;
4411 }
4412 
4414  const CallBase *Call, bool MustPreserveNullness) {
4415  switch (Call->getIntrinsicID()) {
4416  case Intrinsic::launder_invariant_group:
4417  case Intrinsic::strip_invariant_group:
4418  case Intrinsic::aarch64_irg:
4419  case Intrinsic::aarch64_tagp:
4420  return true;
4421  case Intrinsic::ptrmask:
4422  return !MustPreserveNullness;
4423  default:
4424  return false;
4425  }
4426 }
4427 
4428 /// \p PN defines a loop-variant pointer to an object. Check if the
4429 /// previous iteration of the loop was referring to the same object as \p PN.
4431  const LoopInfo *LI) {
4432  // Find the loop-defined value.
4433  Loop *L = LI->getLoopFor(PN->getParent());
4434  if (PN->getNumIncomingValues() != 2)
4435  return true;
4436 
4437  // Find the value from previous iteration.
4438  auto *PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(0));
4439  if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L)
4440  PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(1));
4441  if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L)
4442  return true;
4443 
4444  // If a new pointer is loaded in the loop, the pointer references a different
4445  // object in every iteration. E.g.:
4446  // for (i)
4447  // int *p = a[i];
4448  // ...
4449  if (auto *Load = dyn_cast<LoadInst>(PrevValue))
4450  if (!L->isLoopInvariant(Load->getPointerOperand()))
4451  return false;
4452  return true;
4453 }
4454 
4455 const Value *llvm::getUnderlyingObject(const Value *V, unsigned MaxLookup) {
4456  if (!V->getType()->isPointerTy())
4457  return V;
4458  for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
4459  if (auto *GEP = dyn_cast<GEPOperator>(V)) {
4460  V = GEP->getPointerOperand();
4461  } else if (Operator::getOpcode(V) == Instruction::BitCast ||
4462  Operator::getOpcode(V) == Instruction::AddrSpaceCast) {
4463  V = cast<Operator>(V)->getOperand(0);
4464  if (!V->getType()->isPointerTy())
4465  return V;
4466  } else if (auto *GA = dyn_cast<GlobalAlias>(V)) {
4467  if (GA->isInterposable())
4468  return V;
4469  V = GA->getAliasee();
4470  } else {
4471  if (auto *PHI = dyn_cast<PHINode>(V)) {
4472  // Look through single-arg phi nodes created by LCSSA.
4473  if (PHI->getNumIncomingValues() == 1) {
4474  V = PHI->getIncomingValue(0);
4475  continue;
4476  }
4477  } else if (auto *Call = dyn_cast<CallBase>(V)) {
4478  // CaptureTracking can know about special capturing properties of some
4479  // intrinsics like launder.invariant.group, that can't be expressed with
4480  // the attributes, but have properties like returning aliasing pointer.
4481  // Because some analysis may assume that nocaptured pointer is not
4482  // returned from some special intrinsic (because function would have to
4483  // be marked with returns attribute), it is crucial to use this function
4484  // because it should be in sync with CaptureTracking. Not using it may
4485  // cause weird miscompilations where 2 aliasing pointers are assumed to
4486  // noalias.
4487  if (auto *RP = getArgumentAliasingToReturnedPointer(Call, false)) {
4488  V = RP;
4489  continue;
4490  }
4491  }
4492 
4493  return V;
4494  }
4495  assert(V->getType()->isPointerTy() && "Unexpected operand type!");
4496  }
4497  return V;
4498 }
4499 
4502  LoopInfo *LI, unsigned MaxLookup) {
4505  Worklist.push_back(V);
4506  do {
4507  const Value *P = Worklist.pop_back_val();
4508  P = getUnderlyingObject(P, MaxLookup);
4509 
4510  if (!Visited.insert(P).second)
4511  continue;
4512 
4513  if (auto *SI = dyn_cast<SelectInst>(P)) {
4514  Worklist.push_back(SI->getTrueValue());
4515  Worklist.push_back(SI->getFalseValue());
4516  continue;
4517  }
4518 
4519  if (auto *PN = dyn_cast<PHINode>(P)) {
4520  // If this PHI changes the underlying object in every iteration of the
4521  // loop, don't look through it. Consider:
4522  // int **A;
4523  // for (i) {
4524  // Prev = Curr; // Prev = PHI (Prev_0, Curr)
4525  // Curr = A[i];
4526  // *Prev, *Curr;
4527  //
4528  // Prev is tracking Curr one iteration behind so they refer to different
4529  // underlying objects.
4530  if (!LI || !LI->isLoopHeader(PN->getParent()) ||
4532  append_range(Worklist, PN->incoming_values());
4533  continue;
4534  }
4535 
4536  Objects.push_back(P);
4537  } while (!Worklist.empty());
4538 }
4539 
4540 /// This is the function that does the work of looking through basic
4541 /// ptrtoint+arithmetic+inttoptr sequences.
4542 static const Value *getUnderlyingObjectFromInt(const Value *V) {
4543  do {
4544  if (const Operator *U = dyn_cast<Operator>(V)) {
4545  // If we find a ptrtoint, we can transfer control back to the
4546  // regular getUnderlyingObjectFromInt.
4547  if (U->getOpcode() == Instruction::PtrToInt)
4548  return U->getOperand(0);
4549  // If we find an add of a constant, a multiplied value, or a phi, it's
4550  // likely that the other operand will lead us to the base
4551  // object. We don't have to worry about the case where the
4552  // object address is somehow being computed by the multiply,
4553  // because our callers only care when the result is an
4554  // identifiable object.
4555  if (U->getOpcode() != Instruction::Add ||
4556  (!isa<ConstantInt>(U->getOperand(1)) &&
4557  Operator::getOpcode(U->getOperand(1)) != Instruction::Mul &&
4558  !isa<PHINode>(U->getOperand(1))))
4559  return V;
4560  V = U->getOperand(0);
4561  } else {
4562  return V;
4563  }
4564  assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
4565  } while (true);
4566 }
4567 
4568 /// This is a wrapper around getUnderlyingObjects and adds support for basic
4569 /// ptrtoint+arithmetic+inttoptr sequences.
4570 /// It returns false if unidentified object is found in getUnderlyingObjects.
4572  SmallVectorImpl<Value *> &Objects) {
4574  SmallVector<const Value *, 4> Working(1, V);
4575  do {
4576  V = Working.pop_back_val();
4577 
4579  getUnderlyingObjects(V, Objs);
4580 
4581  for (const Value *V : Objs) {
4582  if (!Visited.insert(V).second)
4583  continue;
4584  if (Operator::getOpcode(V) == Instruction::IntToPtr) {
4585  const Value *O =
4586  getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
4587  if (O->getType()->isPointerTy()) {
4588  Working.push_back(O);
4589  continue;
4590  }
4591  }
4592  // If getUnderlyingObjects fails to find an identifiable object,
4593  // getUnderlyingObjectsForCodeGen also fails for safety.
4594  if (!isIdentifiedObject(V)) {
4595  Objects.clear();
4596  return false;
4597  }
4598  Objects.push_back(const_cast<Value *>(V));
4599  }
4600  } while (!Working.empty());
4601  return true;
4602 }
4603 
4605  AllocaInst *Result = nullptr;
4606  SmallPtrSet<Value *, 4> Visited;
4607  SmallVector<Value *, 4> Worklist;
4608 
4609  auto AddWork = [&](Value *V) {
4610  if (Visited.insert(V).second)
4611  Worklist.push_back(V);
4612  };
4613 
4614  AddWork(V);
4615  do {
4616  V = Worklist.pop_back_val();
4617  assert(Visited.count(V));
4618 
4619  if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
4620  if (Result && Result != AI)
4621  return nullptr;
4622  Result = AI;
4623  } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
4624  AddWork(CI->getOperand(0));
4625  } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
4626  for (Value *IncValue : PN->incoming_values())
4627  AddWork(IncValue);
4628  } else if (auto *SI = dyn_cast<SelectInst>(V)) {
4629  AddWork(SI->getTrueValue());
4630  AddWork(SI->getFalseValue());
4631  } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
4632  if (OffsetZero && !GEP->hasAllZeroIndices())
4633  return nullptr;
4634  AddWork(GEP->getPointerOperand());
4635  } else if (CallBase *CB = dyn_cast<CallBase>(V)) {
4636  Value *Returned = CB->getReturnedArgOperand();
4637  if (Returned)
4638  AddWork(Returned);
4639  else
4640  return nullptr;
4641  } else {
4642  return nullptr;
4643  }
4644  } while (!Worklist.empty());
4645 
4646  return Result;
4647 }
4648 
4650  const Value *V, bool AllowLifetime, bool AllowDroppable) {
4651  for (const User *U : V->users()) {
4652  const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
4653  if (!II)
4654  return false;
4655 
4656  if (AllowLifetime && II->isLifetimeStartOrEnd())
4657  continue;
4658 
4659  if (AllowDroppable && II->isDroppable())
4660  continue;
4661 
4662  return false;
4663  }
4664  return true;
4665 }
4666 
4669  V, /* AllowLifetime */ true, /* AllowDroppable */ false);
4670 }
4673  V, /* AllowLifetime */ true, /* AllowDroppable */ true);
4674 }
4675 
4677  if (!LI.isUnordered())
4678  return true;
4679  const Function &F = *LI.getFunction();
4680  // Speculative load may create a race that did not exist in the source.
4681  return F.hasFnAttribute(Attribute::SanitizeThread) ||
4682  // Speculative load may load data from dirty regions.
4683  F.hasFnAttribute(Attribute::SanitizeAddress) ||
4684  F.hasFnAttribute(Attribute::SanitizeHWAddress);
4685 }
4686 
4687 
4689  const Instruction *CtxI,
4690  const DominatorTree *DT,
4691  const TargetLibraryInfo *TLI) {
4692  const Operator *Inst = dyn_cast<Operator>(V);
4693  if (!Inst)
4694  return false;
4695  return isSafeToSpeculativelyExecuteWithOpcode(Inst->getOpcode(), Inst, CtxI, DT, TLI);
4696 }
4697 
4699  const Operator *Inst,
4700  const Instruction *CtxI,
4701  const DominatorTree *DT,
4702  const TargetLibraryInfo *TLI) {
4703 #ifndef NDEBUG
4704  if (Inst->getOpcode() != Opcode) {
4705  // Check that the operands are actually compatible with the Opcode override.
4706  auto hasEqualReturnAndLeadingOperandTypes =
4707  [](const Operator *Inst, unsigned NumLeadingOperands) {
4708  if (Inst->getNumOperands() < NumLeadingOperands)
4709  return false;
4710  const Type *ExpectedType = Inst->getType();
4711  for (unsigned ItOp = 0; ItOp < NumLeadingOperands; ++ItOp)
4712  if (Inst->getOperand(ItOp)->getType() != ExpectedType)
4713  return false;
4714  return true;
4715  };
4716  assert(!Instruction::isBinaryOp(Opcode) ||
4717  hasEqualReturnAndLeadingOperandTypes(Inst, 2));
4718  assert(!Instruction::isUnaryOp(Opcode) ||
4719  hasEqualReturnAndLeadingOperandTypes(Inst, 1));
4720  }
4721 #endif
4722 
4723  for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
4724  if (Constant *C = dyn_cast<Constant>(Inst->getOperand(i)))
4725  if (C->canTrap())
4726  return false;
4727 
4728  switch (Opcode) {
4729  default:
4730  return true;
4731  case Instruction::UDiv:
4732  case Instruction::URem: {
4733  // x / y is undefined if y == 0.
4734  const APInt *V;
4735  if (match(Inst->getOperand(1), m_APInt(V)))
4736  return *V != 0;
4737  return false;
4738  }
4739  case Instruction::SDiv:
4740  case Instruction::SRem: {
4741  // x / y is undefined if y == 0 or x == INT_MIN and y == -1
4742  const APInt *Numerator, *Denominator;
4743  if (!match(Inst->getOperand(1), m_APInt(Denominator)))
4744  return false;
4745  // We cannot hoist this division if the denominator is 0.
4746  if (*Denominator == 0)
4747  return false;
4748  // It's safe to hoist if the denominator is not 0 or -1.
4749  if (!Denominator->isAllOnes())
4750  return true;
4751  // At this point we know that the denominator is -1. It is safe to hoist as
4752  // long we know that the numerator is not INT_MIN.
4753  if (match(Inst->getOperand(0), m_APInt(Numerator)))
4754  return !Numerator->isMinSignedValue();
4755  // The numerator *might* be MinSignedValue.
4756  return false;
4757  }
4758  case Instruction::Load: {
4759  const LoadInst *LI = dyn_cast<LoadInst>(Inst);
4760  if (!LI)
4761  return false;
4762  if (mustSuppressSpeculation(*LI))
4763  return false;
4764  const DataLayout &DL = LI->getModule()->getDataLayout();
4766  LI->getPointerOperand(), LI->getType(), LI->getAlign(), DL, CtxI, DT,
4767  TLI);
4768  }
4769  case Instruction::Call: {
4770  auto *CI = dyn_cast<const CallInst>(Inst);
4771  if (!CI)
4772  return false;
4773  const Function *Callee = CI->getCalledFunction();
4774 
4775  // The called function could have undefined behavior or side-effects, even
4776  // if marked readnone nounwind.
4777  return Callee && Callee->isSpeculatable();
4778  }
4779  case Instruction::VAArg:
4780  case Instruction::Alloca:
4781  case Instruction::Invoke:
4782  case Instruction::CallBr:
4783  case Instruction::PHI:
4784  case Instruction::Store:
4785  case Instruction::Ret:
4786  case Instruction::Br:
4787  case Instruction::IndirectBr:
4788  case Instruction::Switch:
4789  case Instruction::Unreachable:
4790  case Instruction::Fence:
4791  case Instruction::AtomicRMW:
4792  case Instruction::AtomicCmpXchg:
4793  case Instruction::LandingPad:
4794  case Instruction::Resume:
4795  case Instruction::CatchSwitch:
4796  case Instruction::CatchPad:
4797  case Instruction::CatchRet:
4798  case Instruction::CleanupPad:
4799  case Instruction::CleanupRet:
4800  return false; // Misc instructions which have effects
4801  }
4802 }
4803 
4805  if (I.mayReadOrWriteMemory())
4806  // Memory dependency possible
4807  return true;
4809  // Can't move above a maythrow call or infinite loop. Or if an
4810  // inalloca alloca, above a stacksave call.
4811  return true;
4813  // 1) Can't reorder two inf-loop calls, even if readonly
4814  // 2) Also can't reorder an inf-loop call below a instruction which isn't
4815  // safe to speculative execute. (Inverse of above)
4816  return true;
4817  return false;
4818 }
4819 
4820 /// Convert ConstantRange OverflowResult into ValueTracking OverflowResult.
4822  switch (OR) {
4831  }
4832  llvm_unreachable("Unknown OverflowResult");
4833 }
4834 
4835 /// Combine constant ranges from computeConstantRange() and computeKnownBits().
4837  const Value *V, bool ForSigned, const DataLayout &DL, unsigned Depth,
4838  AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
4839  OptimizationRemarkEmitter *ORE = nullptr, bool UseInstrInfo = true) {
4840  KnownBits Known = computeKnownBits(
4841  V, DL, Depth, AC, CxtI, DT, ORE, UseInstrInfo);
4842  ConstantRange CR1 = ConstantRange::fromKnownBits(Known, ForSigned);
4843  ConstantRange CR2 = computeConstantRange(V, UseInstrInfo);
4846  return CR1.intersectWith(CR2, RangeType);
4847 }
4848 
4850  const Value *LHS, const Value *RHS, const DataLayout &DL,
4851  AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
4852  bool UseInstrInfo) {
4853  KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT,
4854  nullptr, UseInstrInfo);
4855  KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
4856  nullptr, UseInstrInfo);
4857  ConstantRange LHSRange = ConstantRange::fromKnownBits(LHSKnown, false);
4858  ConstantRange RHSRange = ConstantRange::fromKnownBits(RHSKnown, false);
4859  return mapOverflowResult(LHSRange.unsignedMulMayOverflow(RHSRange));
4860 }
4861 
4864  const DataLayout &DL, AssumptionCache *AC,
4865  const Instruction *CxtI,
4866  const DominatorTree *DT, bool UseInstrInfo) {
4867  // Multiplying n * m significant bits yields a result of n + m significant
4868  // bits. If the total number of significant bits does not exceed the
4869  // result bit width (minus 1), there is no overflow.
4870  // This means if we have enough leading sign bits in the operands
4871  // we can guarantee that the result does not overflow.
4872  // Ref: "Hacker's Delight" by Henry Warren
4873  unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
4874 
4875  // Note that underestimating the number of sign bits gives a more
4876  // conservative answer.
4877  unsigned SignBits = ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) +
4878  ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT);
4879 
4880  // First handle the easy case: if we have enough sign bits there's
4881  // definitely no overflow.
4882  if (SignBits > BitWidth + 1)
4884 
4885  // There are two ambiguous cases where there can be no overflow:
4886  // SignBits == BitWidth + 1 and
4887  // SignBits == BitWidth
4888  // The second case is difficult to check, therefore we only handle the
4889  // first case.
4890  if (SignBits == BitWidth + 1) {
4891  // It overflows only when both arguments are negative and the true
4892  // product is exactly the minimum negative number.
4893  // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000
4894  // For simplicity we just check if at least one side is not negative.
4895  KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT,
4896  nullptr, UseInstrInfo);
4897  KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
4898  nullptr, UseInstrInfo);
4899  if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative())
4901  }
4903 }
4904 
4906  const Value *LHS, const Value *RHS, const DataLayout &DL,
4907  AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
4908  bool UseInstrInfo) {
4910  LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT,
4911  nullptr, UseInstrInfo);
4913  RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT,
4914  nullptr, UseInstrInfo);
4915  return mapOverflowResult(LHSRange.unsignedAddMayOverflow(RHSRange));
4916 }
4917 
4919  const Value *RHS,
4920  const AddOperator *Add,
4921