LLVM  8.0.0svn
InstCombineCompares.cpp
Go to the documentation of this file.
1 //===- InstCombineCompares.cpp --------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the visitICmp and visitFCmp functions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/Statistic.h"
21 #include "llvm/IR/ConstantRange.h"
22 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/IntrinsicInst.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/KnownBits.h"
28 
29 using namespace llvm;
30 using namespace PatternMatch;
31 
32 #define DEBUG_TYPE "instcombine"
33 
34 // How many times is a select replaced by one of its operands?
35 STATISTIC(NumSel, "Number of select opts");
36 
37 
38 /// Compute Result = In1+In2, returning true if the result overflowed for this
39 /// type.
40 static bool addWithOverflow(APInt &Result, const APInt &In1,
41  const APInt &In2, bool IsSigned = false) {
42  bool Overflow;
43  if (IsSigned)
44  Result = In1.sadd_ov(In2, Overflow);
45  else
46  Result = In1.uadd_ov(In2, Overflow);
47 
48  return Overflow;
49 }
50 
51 /// Compute Result = In1-In2, returning true if the result overflowed for this
52 /// type.
53 static bool subWithOverflow(APInt &Result, const APInt &In1,
54  const APInt &In2, bool IsSigned = false) {
55  bool Overflow;
56  if (IsSigned)
57  Result = In1.ssub_ov(In2, Overflow);
58  else
59  Result = In1.usub_ov(In2, Overflow);
60 
61  return Overflow;
62 }
63 
64 /// Given an icmp instruction, return true if any use of this comparison is a
65 /// branch on sign bit comparison.
66 static bool hasBranchUse(ICmpInst &I) {
67  for (auto *U : I.users())
68  if (isa<BranchInst>(U))
69  return true;
70  return false;
71 }
72 
73 /// Given an exploded icmp instruction, return true if the comparison only
74 /// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if the
75 /// result of the comparison is true when the input value is signed.
76 static bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS,
77  bool &TrueIfSigned) {
78  switch (Pred) {
79  case ICmpInst::ICMP_SLT: // True if LHS s< 0
80  TrueIfSigned = true;
81  return RHS.isNullValue();
82  case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1
83  TrueIfSigned = true;
84  return RHS.isAllOnesValue();
85  case ICmpInst::ICMP_SGT: // True if LHS s> -1
86  TrueIfSigned = false;
87  return RHS.isAllOnesValue();
88  case ICmpInst::ICMP_UGT:
89  // True if LHS u> RHS and RHS == high-bit-mask - 1
90  TrueIfSigned = true;
91  return RHS.isMaxSignedValue();
92  case ICmpInst::ICMP_UGE:
93  // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
94  TrueIfSigned = true;
95  return RHS.isSignMask();
96  default:
97  return false;
98  }
99 }
100 
101 /// Returns true if the exploded icmp can be expressed as a signed comparison
102 /// to zero and updates the predicate accordingly.
103 /// The signedness of the comparison is preserved.
104 /// TODO: Refactor with decomposeBitTestICmp()?
105 static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) {
106  if (!ICmpInst::isSigned(Pred))
107  return false;
108 
109  if (C.isNullValue())
110  return ICmpInst::isRelational(Pred);
111 
112  if (C.isOneValue()) {
113  if (Pred == ICmpInst::ICMP_SLT) {
114  Pred = ICmpInst::ICMP_SLE;
115  return true;
116  }
117  } else if (C.isAllOnesValue()) {
118  if (Pred == ICmpInst::ICMP_SGT) {
119  Pred = ICmpInst::ICMP_SGE;
120  return true;
121  }
122  }
123 
124  return false;
125 }
126 
127 /// Given a signed integer type and a set of known zero and one bits, compute
128 /// the maximum and minimum values that could have the specified known zero and
129 /// known one bits, returning them in Min/Max.
130 /// TODO: Move to method on KnownBits struct?
132  APInt &Min, APInt &Max) {
133  assert(Known.getBitWidth() == Min.getBitWidth() &&
134  Known.getBitWidth() == Max.getBitWidth() &&
135  "KnownZero, KnownOne and Min, Max must have equal bitwidth.");
136  APInt UnknownBits = ~(Known.Zero|Known.One);
137 
138  // The minimum value is when all unknown bits are zeros, EXCEPT for the sign
139  // bit if it is unknown.
140  Min = Known.One;
141  Max = Known.One|UnknownBits;
142 
143  if (UnknownBits.isNegative()) { // Sign bit is unknown
144  Min.setSignBit();
145  Max.clearSignBit();
146  }
147 }
148 
149 /// Given an unsigned integer type and a set of known zero and one bits, compute
150 /// the maximum and minimum values that could have the specified known zero and
151 /// known one bits, returning them in Min/Max.
152 /// TODO: Move to method on KnownBits struct?
154  APInt &Min, APInt &Max) {
155  assert(Known.getBitWidth() == Min.getBitWidth() &&
156  Known.getBitWidth() == Max.getBitWidth() &&
157  "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.");
158  APInt UnknownBits = ~(Known.Zero|Known.One);
159 
160  // The minimum value is when the unknown bits are all zeros.
161  Min = Known.One;
162  // The maximum value is when the unknown bits are all ones.
163  Max = Known.One|UnknownBits;
164 }
165 
166 /// This is called when we see this pattern:
167 /// cmp pred (load (gep GV, ...)), cmpcst
168 /// where GV is a global variable with a constant initializer. Try to simplify
169 /// this into some simple computation that does not need the load. For example
170 /// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3".
171 ///
172 /// If AndCst is non-null, then the loaded value is masked with that constant
173 /// before doing the comparison. This handles cases like "A[i]&4 == 0".
174 Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
175  GlobalVariable *GV,
176  CmpInst &ICI,
177  ConstantInt *AndCst) {
178  Constant *Init = GV->getInitializer();
179  if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
180  return nullptr;
181 
182  uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
183  // Don't blow up on huge arrays.
184  if (ArrayElementCount > MaxArraySizeForCombine)
185  return nullptr;
186 
187  // There are many forms of this optimization we can handle, for now, just do
188  // the simple index into a single-dimensional array.
189  //
190  // Require: GEP GV, 0, i {{, constant indices}}
191  if (GEP->getNumOperands() < 3 ||
192  !isa<ConstantInt>(GEP->getOperand(1)) ||
193  !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
194  isa<Constant>(GEP->getOperand(2)))
195  return nullptr;
196 
197  // Check that indices after the variable are constants and in-range for the
198  // type they index. Collect the indices. This is typically for arrays of
199  // structs.
200  SmallVector<unsigned, 4> LaterIndices;
201 
202  Type *EltTy = Init->getType()->getArrayElementType();
203  for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
204  ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
205  if (!Idx) return nullptr; // Variable index.
206 
207  uint64_t IdxVal = Idx->getZExtValue();
208  if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index.
209 
210  if (StructType *STy = dyn_cast<StructType>(EltTy))
211  EltTy = STy->getElementType(IdxVal);
212  else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
213  if (IdxVal >= ATy->getNumElements()) return nullptr;
214  EltTy = ATy->getElementType();
215  } else {
216  return nullptr; // Unknown type.
217  }
218 
219  LaterIndices.push_back(IdxVal);
220  }
221 
222  enum { Overdefined = -3, Undefined = -2 };
223 
224  // Variables for our state machines.
225 
226  // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form
227  // "i == 47 | i == 87", where 47 is the first index the condition is true for,
228  // and 87 is the second (and last) index. FirstTrueElement is -2 when
229  // undefined, otherwise set to the first true element. SecondTrueElement is
230  // -2 when undefined, -3 when overdefined and >= 0 when that index is true.
231  int FirstTrueElement = Undefined, SecondTrueElement = Undefined;
232 
233  // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the
234  // form "i != 47 & i != 87". Same state transitions as for true elements.
235  int FirstFalseElement = Undefined, SecondFalseElement = Undefined;
236 
237  /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these
238  /// define a state machine that triggers for ranges of values that the index
239  /// is true or false for. This triggers on things like "abbbbc"[i] == 'b'.
240  /// This is -2 when undefined, -3 when overdefined, and otherwise the last
241  /// index in the range (inclusive). We use -2 for undefined here because we
242  /// use relative comparisons and don't want 0-1 to match -1.
243  int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined;
244 
245  // MagicBitvector - This is a magic bitvector where we set a bit if the
246  // comparison is true for element 'i'. If there are 64 elements or less in
247  // the array, this will fully represent all the comparison results.
248  uint64_t MagicBitvector = 0;
249 
250  // Scan the array and see if one of our patterns matches.
251  Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
252  for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
253  Constant *Elt = Init->getAggregateElement(i);
254  if (!Elt) return nullptr;
255 
256  // If this is indexing an array of structures, get the structure element.
257  if (!LaterIndices.empty())
258  Elt = ConstantExpr::getExtractValue(Elt, LaterIndices);
259 
260  // If the element is masked, handle it.
261  if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst);
262 
263  // Find out if the comparison would be true or false for the i'th element.
265  CompareRHS, DL, &TLI);
266  // If the result is undef for this element, ignore it.
267  if (isa<UndefValue>(C)) {
268  // Extend range state machines to cover this element in case there is an
269  // undef in the middle of the range.
270  if (TrueRangeEnd == (int)i-1)
271  TrueRangeEnd = i;
272  if (FalseRangeEnd == (int)i-1)
273  FalseRangeEnd = i;
274  continue;
275  }
276 
277  // If we can't compute the result for any of the elements, we have to give
278  // up evaluating the entire conditional.
279  if (!isa<ConstantInt>(C)) return nullptr;
280 
281  // Otherwise, we know if the comparison is true or false for this element,
282  // update our state machines.
283  bool IsTrueForElt = !cast<ConstantInt>(C)->isZero();
284 
285  // State machine for single/double/range index comparison.
286  if (IsTrueForElt) {
287  // Update the TrueElement state machine.
288  if (FirstTrueElement == Undefined)
289  FirstTrueElement = TrueRangeEnd = i; // First true element.
290  else {
291  // Update double-compare state machine.
292  if (SecondTrueElement == Undefined)
293  SecondTrueElement = i;
294  else
295  SecondTrueElement = Overdefined;
296 
297  // Update range state machine.
298  if (TrueRangeEnd == (int)i-1)
299  TrueRangeEnd = i;
300  else
301  TrueRangeEnd = Overdefined;
302  }
303  } else {
304  // Update the FalseElement state machine.
305  if (FirstFalseElement == Undefined)
306  FirstFalseElement = FalseRangeEnd = i; // First false element.
307  else {
308  // Update double-compare state machine.
309  if (SecondFalseElement == Undefined)
310  SecondFalseElement = i;
311  else
312  SecondFalseElement = Overdefined;
313 
314  // Update range state machine.
315  if (FalseRangeEnd == (int)i-1)
316  FalseRangeEnd = i;
317  else
318  FalseRangeEnd = Overdefined;
319  }
320  }
321 
322  // If this element is in range, update our magic bitvector.
323  if (i < 64 && IsTrueForElt)
324  MagicBitvector |= 1ULL << i;
325 
326  // If all of our states become overdefined, bail out early. Since the
327  // predicate is expensive, only check it every 8 elements. This is only
328  // really useful for really huge arrays.
329  if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined &&
330  SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined &&
331  FalseRangeEnd == Overdefined)
332  return nullptr;
333  }
334 
335  // Now that we've scanned the entire array, emit our new comparison(s). We
336  // order the state machines in complexity of the generated code.
337  Value *Idx = GEP->getOperand(2);
338 
339  // If the index is larger than the pointer size of the target, truncate the
340  // index down like the GEP would do implicitly. We don't have to do this for
341  // an inbounds GEP because the index can't be out of range.
342  if (!GEP->isInBounds()) {
343  Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
344  unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
345  if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize)
346  Idx = Builder.CreateTrunc(Idx, IntPtrTy);
347  }
348 
349  // If the comparison is only true for one or two elements, emit direct
350  // comparisons.
351  if (SecondTrueElement != Overdefined) {
352  // None true -> false.
353  if (FirstTrueElement == Undefined)
354  return replaceInstUsesWith(ICI, Builder.getFalse());
355 
356  Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement);
357 
358  // True for one element -> 'i == 47'.
359  if (SecondTrueElement == Undefined)
360  return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx);
361 
362  // True for two elements -> 'i == 47 | i == 72'.
363  Value *C1 = Builder.CreateICmpEQ(Idx, FirstTrueIdx);
364  Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement);
365  Value *C2 = Builder.CreateICmpEQ(Idx, SecondTrueIdx);
366  return BinaryOperator::CreateOr(C1, C2);
367  }
368 
369  // If the comparison is only false for one or two elements, emit direct
370  // comparisons.
371  if (SecondFalseElement != Overdefined) {
372  // None false -> true.
373  if (FirstFalseElement == Undefined)
374  return replaceInstUsesWith(ICI, Builder.getTrue());
375 
376  Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement);
377 
378  // False for one element -> 'i != 47'.
379  if (SecondFalseElement == Undefined)
380  return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx);
381 
382  // False for two elements -> 'i != 47 & i != 72'.
383  Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx);
384  Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement);
385  Value *C2 = Builder.CreateICmpNE(Idx, SecondFalseIdx);
386  return BinaryOperator::CreateAnd(C1, C2);
387  }
388 
389  // If the comparison can be replaced with a range comparison for the elements
390  // where it is true, emit the range check.
391  if (TrueRangeEnd != Overdefined) {
392  assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
393 
394  // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
395  if (FirstTrueElement) {
396  Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
397  Idx = Builder.CreateAdd(Idx, Offs);
398  }
399 
400  Value *End = ConstantInt::get(Idx->getType(),
401  TrueRangeEnd-FirstTrueElement+1);
402  return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
403  }
404 
405  // False range check.
406  if (FalseRangeEnd != Overdefined) {
407  assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
408  // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
409  if (FirstFalseElement) {
410  Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
411  Idx = Builder.CreateAdd(Idx, Offs);
412  }
413 
414  Value *End = ConstantInt::get(Idx->getType(),
415  FalseRangeEnd-FirstFalseElement);
416  return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
417  }
418 
419  // If a magic bitvector captures the entire comparison state
420  // of this load, replace it with computation that does:
421  // ((magic_cst >> i) & 1) != 0
422  {
423  Type *Ty = nullptr;
424 
425  // Look for an appropriate type:
426  // - The type of Idx if the magic fits
427  // - The smallest fitting legal type
428  if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
429  Ty = Idx->getType();
430  else
431  Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
432 
433  if (Ty) {
434  Value *V = Builder.CreateIntCast(Idx, Ty, false);
435  V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
436  V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V);
437  return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
438  }
439  }
440 
441  return nullptr;
442 }
443 
444 /// Return a value that can be used to compare the *offset* implied by a GEP to
445 /// zero. For example, if we have &A[i], we want to return 'i' for
446 /// "icmp ne i, 0". Note that, in general, indices can be complex, and scales
447 /// are involved. The above expression would also be legal to codegen as
448 /// "icmp ne (i*4), 0" (assuming A is a pointer to i32).
449 /// This latter form is less amenable to optimization though, and we are allowed
450 /// to generate the first by knowing that pointer arithmetic doesn't overflow.
451 ///
452 /// If we can't emit an optimized form for this expression, this returns null.
453 ///
455  const DataLayout &DL) {
457 
458  // Check to see if this gep only has a single variable index. If so, and if
459  // any constant indices are a multiple of its scale, then we can compute this
460  // in terms of the scale of the variable index. For example, if the GEP
461  // implies an offset of "12 + i*4", then we can codegen this as "3 + i",
462  // because the expression will cross zero at the same point.
463  unsigned i, e = GEP->getNumOperands();
464  int64_t Offset = 0;
465  for (i = 1; i != e; ++i, ++GTI) {
466  if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
467  // Compute the aggregate offset of constant indices.
468  if (CI->isZero()) continue;
469 
470  // Handle a struct index, which adds its field offset to the pointer.
471  if (StructType *STy = GTI.getStructTypeOrNull()) {
472  Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
473  } else {
474  uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
475  Offset += Size*CI->getSExtValue();
476  }
477  } else {
478  // Found our variable index.
479  break;
480  }
481  }
482 
483  // If there are no variable indices, we must have a constant offset, just
484  // evaluate it the general way.
485  if (i == e) return nullptr;
486 
487  Value *VariableIdx = GEP->getOperand(i);
488  // Determine the scale factor of the variable element. For example, this is
489  // 4 if the variable index is into an array of i32.
490  uint64_t VariableScale = DL.getTypeAllocSize(GTI.getIndexedType());
491 
492  // Verify that there are no other variable indices. If so, emit the hard way.
493  for (++i, ++GTI; i != e; ++i, ++GTI) {
494  ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
495  if (!CI) return nullptr;
496 
497  // Compute the aggregate offset of constant indices.
498  if (CI->isZero()) continue;
499 
500  // Handle a struct index, which adds its field offset to the pointer.
501  if (StructType *STy = GTI.getStructTypeOrNull()) {
502  Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
503  } else {
504  uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
505  Offset += Size*CI->getSExtValue();
506  }
507  }
508 
509  // Okay, we know we have a single variable index, which must be a
510  // pointer/array/vector index. If there is no offset, life is simple, return
511  // the index.
512  Type *IntPtrTy = DL.getIntPtrType(GEP->getOperand(0)->getType());
513  unsigned IntPtrWidth = IntPtrTy->getIntegerBitWidth();
514  if (Offset == 0) {
515  // Cast to intptrty in case a truncation occurs. If an extension is needed,
516  // we don't need to bother extending: the extension won't affect where the
517  // computation crosses zero.
518  if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) {
519  VariableIdx = IC.Builder.CreateTrunc(VariableIdx, IntPtrTy);
520  }
521  return VariableIdx;
522  }
523 
524  // Otherwise, there is an index. The computation we will do will be modulo
525  // the pointer size, so get it.
526  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
527 
528  Offset &= PtrSizeMask;
529  VariableScale &= PtrSizeMask;
530 
531  // To do this transformation, any constant index must be a multiple of the
532  // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i",
533  // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a
534  // multiple of the variable scale.
535  int64_t NewOffs = Offset / (int64_t)VariableScale;
536  if (Offset != NewOffs*(int64_t)VariableScale)
537  return nullptr;
538 
539  // Okay, we can do this evaluation. Start by converting the index to intptr.
540  if (VariableIdx->getType() != IntPtrTy)
541  VariableIdx = IC.Builder.CreateIntCast(VariableIdx, IntPtrTy,
542  true /*Signed*/);
543  Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
544  return IC.Builder.CreateAdd(VariableIdx, OffsetVal, "offset");
545 }
546 
547 /// Returns true if we can rewrite Start as a GEP with pointer Base
548 /// and some integer offset. The nodes that need to be re-written
549 /// for this transformation will be added to Explored.
550 static bool canRewriteGEPAsOffset(Value *Start, Value *Base,
551  const DataLayout &DL,
552  SetVector<Value *> &Explored) {
553  SmallVector<Value *, 16> WorkList(1, Start);
554  Explored.insert(Base);
555 
556  // The following traversal gives us an order which can be used
557  // when doing the final transformation. Since in the final
558  // transformation we create the PHI replacement instructions first,
559  // we don't have to get them in any particular order.
560  //
561  // However, for other instructions we will have to traverse the
562  // operands of an instruction first, which means that we have to
563  // do a post-order traversal.
564  while (!WorkList.empty()) {
566 
567  while (!WorkList.empty()) {
568  if (Explored.size() >= 100)
569  return false;
570 
571  Value *V = WorkList.back();
572 
573  if (Explored.count(V) != 0) {
574  WorkList.pop_back();
575  continue;
576  }
577 
578  if (!isa<IntToPtrInst>(V) && !isa<PtrToIntInst>(V) &&
579  !isa<GetElementPtrInst>(V) && !isa<PHINode>(V))
580  // We've found some value that we can't explore which is different from
581  // the base. Therefore we can't do this transformation.
582  return false;
583 
584  if (isa<IntToPtrInst>(V) || isa<PtrToIntInst>(V)) {
585  auto *CI = dyn_cast<CastInst>(V);
586  if (!CI->isNoopCast(DL))
587  return false;
588 
589  if (Explored.count(CI->getOperand(0)) == 0)
590  WorkList.push_back(CI->getOperand(0));
591  }
592 
593  if (auto *GEP = dyn_cast<GEPOperator>(V)) {
594  // We're limiting the GEP to having one index. This will preserve
595  // the original pointer type. We could handle more cases in the
596  // future.
597  if (GEP->getNumIndices() != 1 || !GEP->isInBounds() ||
598  GEP->getType() != Start->getType())
599  return false;
600 
601  if (Explored.count(GEP->getOperand(0)) == 0)
602  WorkList.push_back(GEP->getOperand(0));
603  }
604 
605  if (WorkList.back() == V) {
606  WorkList.pop_back();
607  // We've finished visiting this node, mark it as such.
608  Explored.insert(V);
609  }
610 
611  if (auto *PN = dyn_cast<PHINode>(V)) {
612  // We cannot transform PHIs on unsplittable basic blocks.
613  if (isa<CatchSwitchInst>(PN->getParent()->getTerminator()))
614  return false;
615  Explored.insert(PN);
616  PHIs.insert(PN);
617  }
618  }
619 
620  // Explore the PHI nodes further.
621  for (auto *PN : PHIs)
622  for (Value *Op : PN->incoming_values())
623  if (Explored.count(Op) == 0)
624  WorkList.push_back(Op);
625  }
626 
627  // Make sure that we can do this. Since we can't insert GEPs in a basic
628  // block before a PHI node, we can't easily do this transformation if
629  // we have PHI node users of transformed instructions.
630  for (Value *Val : Explored) {
631  for (Value *Use : Val->uses()) {
632 
633  auto *PHI = dyn_cast<PHINode>(Use);
634  auto *Inst = dyn_cast<Instruction>(Val);
635 
636  if (Inst == Base || Inst == PHI || !Inst || !PHI ||
637  Explored.count(PHI) == 0)
638  continue;
639 
640  if (PHI->getParent() == Inst->getParent())
641  return false;
642  }
643  }
644  return true;
645 }
646 
647 // Sets the appropriate insert point on Builder where we can add
648 // a replacement Instruction for V (if that is possible).
649 static void setInsertionPoint(IRBuilder<> &Builder, Value *V,
650  bool Before = true) {
651  if (auto *PHI = dyn_cast<PHINode>(V)) {
652  Builder.SetInsertPoint(&*PHI->getParent()->getFirstInsertionPt());
653  return;
654  }
655  if (auto *I = dyn_cast<Instruction>(V)) {
656  if (!Before)
657  I = &*std::next(I->getIterator());
658  Builder.SetInsertPoint(I);
659  return;
660  }
661  if (auto *A = dyn_cast<Argument>(V)) {
662  // Set the insertion point in the entry block.
663  BasicBlock &Entry = A->getParent()->getEntryBlock();
664  Builder.SetInsertPoint(&*Entry.getFirstInsertionPt());
665  return;
666  }
667  // Otherwise, this is a constant and we don't need to set a new
668  // insertion point.
669  assert(isa<Constant>(V) && "Setting insertion point for unknown value!");
670 }
671 
672 /// Returns a re-written value of Start as an indexed GEP using Base as a
673 /// pointer.
675  const DataLayout &DL,
676  SetVector<Value *> &Explored) {
677  // Perform all the substitutions. This is a bit tricky because we can
678  // have cycles in our use-def chains.
679  // 1. Create the PHI nodes without any incoming values.
680  // 2. Create all the other values.
681  // 3. Add the edges for the PHI nodes.
682  // 4. Emit GEPs to get the original pointers.
683  // 5. Remove the original instructions.
684  Type *IndexType = IntegerType::get(
685  Base->getContext(), DL.getIndexTypeSizeInBits(Start->getType()));
686 
688  NewInsts[Base] = ConstantInt::getNullValue(IndexType);
689 
690  // Create the new PHI nodes, without adding any incoming values.
691  for (Value *Val : Explored) {
692  if (Val == Base)
693  continue;
694  // Create empty phi nodes. This avoids cyclic dependencies when creating
695  // the remaining instructions.
696  if (auto *PHI = dyn_cast<PHINode>(Val))
697  NewInsts[PHI] = PHINode::Create(IndexType, PHI->getNumIncomingValues(),
698  PHI->getName() + ".idx", PHI);
699  }
700  IRBuilder<> Builder(Base->getContext());
701 
702  // Create all the other instructions.
703  for (Value *Val : Explored) {
704 
705  if (NewInsts.find(Val) != NewInsts.end())
706  continue;
707 
708  if (auto *CI = dyn_cast<CastInst>(Val)) {
709  NewInsts[CI] = NewInsts[CI->getOperand(0)];
710  continue;
711  }
712  if (auto *GEP = dyn_cast<GEPOperator>(Val)) {
713  Value *Index = NewInsts[GEP->getOperand(1)] ? NewInsts[GEP->getOperand(1)]
714  : GEP->getOperand(1);
715  setInsertionPoint(Builder, GEP);
716  // Indices might need to be sign extended. GEPs will magically do
717  // this, but we need to do it ourselves here.
718  if (Index->getType()->getScalarSizeInBits() !=
719  NewInsts[GEP->getOperand(0)]->getType()->getScalarSizeInBits()) {
720  Index = Builder.CreateSExtOrTrunc(
721  Index, NewInsts[GEP->getOperand(0)]->getType(),
722  GEP->getOperand(0)->getName() + ".sext");
723  }
724 
725  auto *Op = NewInsts[GEP->getOperand(0)];
726  if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
727  NewInsts[GEP] = Index;
728  else
729  NewInsts[GEP] = Builder.CreateNSWAdd(
730  Op, Index, GEP->getOperand(0)->getName() + ".add");
731  continue;
732  }
733  if (isa<PHINode>(Val))
734  continue;
735 
736  llvm_unreachable("Unexpected instruction type");
737  }
738 
739  // Add the incoming values to the PHI nodes.
740  for (Value *Val : Explored) {
741  if (Val == Base)
742  continue;
743  // All the instructions have been created, we can now add edges to the
744  // phi nodes.
745  if (auto *PHI = dyn_cast<PHINode>(Val)) {
746  PHINode *NewPhi = static_cast<PHINode *>(NewInsts[PHI]);
747  for (unsigned I = 0, E = PHI->getNumIncomingValues(); I < E; ++I) {
748  Value *NewIncoming = PHI->getIncomingValue(I);
749 
750  if (NewInsts.find(NewIncoming) != NewInsts.end())
751  NewIncoming = NewInsts[NewIncoming];
752 
753  NewPhi->addIncoming(NewIncoming, PHI->getIncomingBlock(I));
754  }
755  }
756  }
757 
758  for (Value *Val : Explored) {
759  if (Val == Base)
760  continue;
761 
762  // Depending on the type, for external users we have to emit
763  // a GEP or a GEP + ptrtoint.
764  setInsertionPoint(Builder, Val, false);
765 
766  // If required, create an inttoptr instruction for Base.
767  Value *NewBase = Base;
768  if (!Base->getType()->isPointerTy())
769  NewBase = Builder.CreateBitOrPointerCast(Base, Start->getType(),
770  Start->getName() + "to.ptr");
771 
772  Value *GEP = Builder.CreateInBoundsGEP(
773  Start->getType()->getPointerElementType(), NewBase,
774  makeArrayRef(NewInsts[Val]), Val->getName() + ".ptr");
775 
776  if (!Val->getType()->isPointerTy()) {
777  Value *Cast = Builder.CreatePointerCast(GEP, Val->getType(),
778  Val->getName() + ".conv");
779  GEP = Cast;
780  }
781  Val->replaceAllUsesWith(GEP);
782  }
783 
784  return NewInsts[Start];
785 }
786 
787 /// Looks through GEPs, IntToPtrInsts and PtrToIntInsts in order to express
788 /// the input Value as a constant indexed GEP. Returns a pair containing
789 /// the GEPs Pointer and Index.
790 static std::pair<Value *, Value *>
792  Type *IndexType = IntegerType::get(V->getContext(),
794 
796  while (true) {
797  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
798  // We accept only inbouds GEPs here to exclude the possibility of
799  // overflow.
800  if (!GEP->isInBounds())
801  break;
802  if (GEP->hasAllConstantIndices() && GEP->getNumIndices() == 1 &&
803  GEP->getType() == V->getType()) {
804  V = GEP->getOperand(0);
805  Constant *GEPIndex = static_cast<Constant *>(GEP->getOperand(1));
806  Index = ConstantExpr::getAdd(
807  Index, ConstantExpr::getSExtOrBitCast(GEPIndex, IndexType));
808  continue;
809  }
810  break;
811  }
812  if (auto *CI = dyn_cast<IntToPtrInst>(V)) {
813  if (!CI->isNoopCast(DL))
814  break;
815  V = CI->getOperand(0);
816  continue;
817  }
818  if (auto *CI = dyn_cast<PtrToIntInst>(V)) {
819  if (!CI->isNoopCast(DL))
820  break;
821  V = CI->getOperand(0);
822  continue;
823  }
824  break;
825  }
826  return {V, Index};
827 }
828 
829 /// Converts (CMP GEPLHS, RHS) if this change would make RHS a constant.
830 /// We can look through PHIs, GEPs and casts in order to determine a common base
831 /// between GEPLHS and RHS.
833  ICmpInst::Predicate Cond,
834  const DataLayout &DL) {
835  if (!GEPLHS->hasAllConstantIndices())
836  return nullptr;
837 
838  // Make sure the pointers have the same type.
839  if (GEPLHS->getType() != RHS->getType())
840  return nullptr;
841 
842  Value *PtrBase, *Index;
843  std::tie(PtrBase, Index) = getAsConstantIndexedAddress(GEPLHS, DL);
844 
845  // The set of nodes that will take part in this transformation.
846  SetVector<Value *> Nodes;
847 
848  if (!canRewriteGEPAsOffset(RHS, PtrBase, DL, Nodes))
849  return nullptr;
850 
851  // We know we can re-write this as
852  // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)
853  // Since we've only looked through inbouds GEPs we know that we
854  // can't have overflow on either side. We can therefore re-write
855  // this as:
856  // OFFSET1 cmp OFFSET2
857  Value *NewRHS = rewriteGEPAsOffset(RHS, PtrBase, DL, Nodes);
858 
859  // RewriteGEPAsOffset has replaced RHS and all of its uses with a re-written
860  // GEP having PtrBase as the pointer base, and has returned in NewRHS the
861  // offset. Since Index is the offset of LHS to the base pointer, we will now
862  // compare the offsets instead of comparing the pointers.
863  return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Index, NewRHS);
864 }
865 
866 /// Fold comparisons between a GEP instruction and something else. At this point
867 /// we know that the GEP is on the LHS of the comparison.
868 Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
869  ICmpInst::Predicate Cond,
870  Instruction &I) {
871  // Don't transform signed compares of GEPs into index compares. Even if the
872  // GEP is inbounds, the final add of the base pointer can have signed overflow
873  // and would change the result of the icmp.
874  // e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be
875  // the maximum signed value for the pointer type.
876  if (ICmpInst::isSigned(Cond))
877  return nullptr;
878 
879  // Look through bitcasts and addrspacecasts. We do not however want to remove
880  // 0 GEPs.
881  if (!isa<GetElementPtrInst>(RHS))
882  RHS = RHS->stripPointerCasts();
883 
884  Value *PtrBase = GEPLHS->getOperand(0);
885  if (PtrBase == RHS && GEPLHS->isInBounds()) {
886  // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0).
887  // This transformation (ignoring the base and scales) is valid because we
888  // know pointers can't overflow since the gep is inbounds. See if we can
889  // output an optimized form.
890  Value *Offset = evaluateGEPOffsetExpression(GEPLHS, *this, DL);
891 
892  // If not, synthesize the offset the hard way.
893  if (!Offset)
894  Offset = EmitGEPOffset(GEPLHS);
895  return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
896  Constant::getNullValue(Offset->getType()));
897  } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) {
898  // If the base pointers are different, but the indices are the same, just
899  // compare the base pointer.
900  if (PtrBase != GEPRHS->getOperand(0)) {
901  bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands();
902  IndicesTheSame &= GEPLHS->getOperand(0)->getType() ==
903  GEPRHS->getOperand(0)->getType();
904  if (IndicesTheSame)
905  for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
906  if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
907  IndicesTheSame = false;
908  break;
909  }
910 
911  // If all indices are the same, just compare the base pointers.
912  if (IndicesTheSame)
913  return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0));
914 
915  // If we're comparing GEPs with two base pointers that only differ in type
916  // and both GEPs have only constant indices or just one use, then fold
917  // the compare with the adjusted indices.
918  if (GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
919  (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
920  (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) &&
921  PtrBase->stripPointerCasts() ==
922  GEPRHS->getOperand(0)->stripPointerCasts()) {
923  Value *LOffset = EmitGEPOffset(GEPLHS);
924  Value *ROffset = EmitGEPOffset(GEPRHS);
925 
926  // If we looked through an addrspacecast between different sized address
927  // spaces, the LHS and RHS pointers are different sized
928  // integers. Truncate to the smaller one.
929  Type *LHSIndexTy = LOffset->getType();
930  Type *RHSIndexTy = ROffset->getType();
931  if (LHSIndexTy != RHSIndexTy) {
932  if (LHSIndexTy->getPrimitiveSizeInBits() <
933  RHSIndexTy->getPrimitiveSizeInBits()) {
934  ROffset = Builder.CreateTrunc(ROffset, LHSIndexTy);
935  } else
936  LOffset = Builder.CreateTrunc(LOffset, RHSIndexTy);
937  }
938 
939  Value *Cmp = Builder.CreateICmp(ICmpInst::getSignedPredicate(Cond),
940  LOffset, ROffset);
941  return replaceInstUsesWith(I, Cmp);
942  }
943 
944  // Otherwise, the base pointers are different and the indices are
945  // different. Try convert this to an indexed compare by looking through
946  // PHIs/casts.
947  return transformToIndexedCompare(GEPLHS, RHS, Cond, DL);
948  }
949 
950  // If one of the GEPs has all zero indices, recurse.
951  if (GEPLHS->hasAllZeroIndices())
952  return foldGEPICmp(GEPRHS, GEPLHS->getOperand(0),
954 
955  // If the other GEP has all zero indices, recurse.
956  if (GEPRHS->hasAllZeroIndices())
957  return foldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I);
958 
959  bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds();
960  if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) {
961  // If the GEPs only differ by one index, compare it.
962  unsigned NumDifferences = 0; // Keep track of # differences.
963  unsigned DiffOperand = 0; // The operand that differs.
964  for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
965  if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
966  if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() !=
967  GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) {
968  // Irreconcilable differences.
969  NumDifferences = 2;
970  break;
971  } else {
972  if (NumDifferences++) break;
973  DiffOperand = i;
974  }
975  }
976 
977  if (NumDifferences == 0) // SAME GEP?
978  return replaceInstUsesWith(I, // No comparison is needed here.
980 
981  else if (NumDifferences == 1 && GEPsInBounds) {
982  Value *LHSV = GEPLHS->getOperand(DiffOperand);
983  Value *RHSV = GEPRHS->getOperand(DiffOperand);
984  // Make sure we do a signed comparison here.
985  return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV);
986  }
987  }
988 
989  // Only lower this if the icmp is the only user of the GEP or if we expect
990  // the result to fold to a constant!
991  if (GEPsInBounds && (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
992  (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
993  // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2)
994  Value *L = EmitGEPOffset(GEPLHS);
995  Value *R = EmitGEPOffset(GEPRHS);
996  return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R);
997  }
998  }
999 
1000  // Try convert this to an indexed compare by looking through PHIs/casts as a
1001  // last resort.
1002  return transformToIndexedCompare(GEPLHS, RHS, Cond, DL);
1003 }
1004 
1005 Instruction *InstCombiner::foldAllocaCmp(ICmpInst &ICI,
1006  const AllocaInst *Alloca,
1007  const Value *Other) {
1008  assert(ICI.isEquality() && "Cannot fold non-equality comparison.");
1009 
1010  // It would be tempting to fold away comparisons between allocas and any
1011  // pointer not based on that alloca (e.g. an argument). However, even
1012  // though such pointers cannot alias, they can still compare equal.
1013  //
1014  // But LLVM doesn't specify where allocas get their memory, so if the alloca
1015  // doesn't escape we can argue that it's impossible to guess its value, and we
1016  // can therefore act as if any such guesses are wrong.
1017  //
1018  // The code below checks that the alloca doesn't escape, and that it's only
1019  // used in a comparison once (the current instruction). The
1020  // single-comparison-use condition ensures that we're trivially folding all
1021  // comparisons against the alloca consistently, and avoids the risk of
1022  // erroneously folding a comparison of the pointer with itself.
1023 
1024  unsigned MaxIter = 32; // Break cycles and bound to constant-time.
1025 
1027  for (const Use &U : Alloca->uses()) {
1028  if (Worklist.size() >= MaxIter)
1029  return nullptr;
1030  Worklist.push_back(&U);
1031  }
1032 
1033  unsigned NumCmps = 0;
1034  while (!Worklist.empty()) {
1035  assert(Worklist.size() <= MaxIter);
1036  const Use *U = Worklist.pop_back_val();
1037  const Value *V = U->getUser();
1038  --MaxIter;
1039 
1040  if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V) || isa<PHINode>(V) ||
1041  isa<SelectInst>(V)) {
1042  // Track the uses.
1043  } else if (isa<LoadInst>(V)) {
1044  // Loading from the pointer doesn't escape it.
1045  continue;
1046  } else if (const auto *SI = dyn_cast<StoreInst>(V)) {
1047  // Storing *to* the pointer is fine, but storing the pointer escapes it.
1048  if (SI->getValueOperand() == U->get())
1049  return nullptr;
1050  continue;
1051  } else if (isa<ICmpInst>(V)) {
1052  if (NumCmps++)
1053  return nullptr; // Found more than one cmp.
1054  continue;
1055  } else if (const auto *Intrin = dyn_cast<IntrinsicInst>(V)) {
1056  switch (Intrin->getIntrinsicID()) {
1057  // These intrinsics don't escape or compare the pointer. Memset is safe
1058  // because we don't allow ptrtoint. Memcpy and memmove are safe because
1059  // we don't allow stores, so src cannot point to V.
1060  case Intrinsic::lifetime_start: case Intrinsic::lifetime_end:
1061  case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset:
1062  continue;
1063  default:
1064  return nullptr;
1065  }
1066  } else {
1067  return nullptr;
1068  }
1069  for (const Use &U : V->uses()) {
1070  if (Worklist.size() >= MaxIter)
1071  return nullptr;
1072  Worklist.push_back(&U);
1073  }
1074  }
1075 
1076  Type *CmpTy = CmpInst::makeCmpResultType(Other->getType());
1077  return replaceInstUsesWith(
1078  ICI,
1080 }
1081 
1082 /// Fold "icmp pred (X+C), X".
1083 Instruction *InstCombiner::foldICmpAddOpConst(Value *X, const APInt &C,
1084  ICmpInst::Predicate Pred) {
1085  // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
1086  // so the values can never be equal. Similarly for all other "or equals"
1087  // operators.
1088  assert(!!C && "C should not be zero!");
1089 
1090  // (X+1) <u X --> X >u (MAXUINT-1) --> X == 255
1091  // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253
1092  // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0
1093  if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
1094  Constant *R = ConstantInt::get(X->getType(),
1095  APInt::getMaxValue(C.getBitWidth()) - C);
1096  return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
1097  }
1098 
1099  // (X+1) >u X --> X <u (0-1) --> X != 255
1100  // (X+2) >u X --> X <u (0-2) --> X <u 254
1101  // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0
1102  if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
1103  return new ICmpInst(ICmpInst::ICMP_ULT, X,
1104  ConstantInt::get(X->getType(), -C));
1105 
1107 
1108  // (X+ 1) <s X --> X >s (MAXSINT-1) --> X == 127
1109  // (X+ 2) <s X --> X >s (MAXSINT-2) --> X >s 125
1110  // (X+MAXSINT) <s X --> X >s (MAXSINT-MAXSINT) --> X >s 0
1111  // (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1
1112  // (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126
1113  // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127
1114  if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
1115  return new ICmpInst(ICmpInst::ICMP_SGT, X,
1116  ConstantInt::get(X->getType(), SMax - C));
1117 
1118  // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127
1119  // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126
1120  // (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1
1121  // (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2
1122  // (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126
1123  // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128
1124 
1125  assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
1126  return new ICmpInst(ICmpInst::ICMP_SLT, X,
1127  ConstantInt::get(X->getType(), SMax - (C - 1)));
1128 }
1129 
1130 /// Handle "(icmp eq/ne (ashr/lshr AP2, A), AP1)" ->
1131 /// (icmp eq/ne A, Log2(AP2/AP1)) ->
1132 /// (icmp eq/ne A, Log2(AP2) - Log2(AP1)).
1133 Instruction *InstCombiner::foldICmpShrConstConst(ICmpInst &I, Value *A,
1134  const APInt &AP1,
1135  const APInt &AP2) {
1136  assert(I.isEquality() && "Cannot fold icmp gt/lt");
1137 
1138  auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
1139  if (I.getPredicate() == I.ICMP_NE)
1140  Pred = CmpInst::getInversePredicate(Pred);
1141  return new ICmpInst(Pred, LHS, RHS);
1142  };
1143 
1144  // Don't bother doing any work for cases which InstSimplify handles.
1145  if (AP2.isNullValue())
1146  return nullptr;
1147 
1148  bool IsAShr = isa<AShrOperator>(I.getOperand(0));
1149  if (IsAShr) {
1150  if (AP2.isAllOnesValue())
1151  return nullptr;
1152  if (AP2.isNegative() != AP1.isNegative())
1153  return nullptr;
1154  if (AP2.sgt(AP1))
1155  return nullptr;
1156  }
1157 
1158  if (!AP1)
1159  // 'A' must be large enough to shift out the highest set bit.
1160  return getICmp(I.ICMP_UGT, A,
1161  ConstantInt::get(A->getType(), AP2.logBase2()));
1162 
1163  if (AP1 == AP2)
1164  return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
1165 
1166  int Shift;
1167  if (IsAShr && AP1.isNegative())
1168  Shift = AP1.countLeadingOnes() - AP2.countLeadingOnes();
1169  else
1170  Shift = AP1.countLeadingZeros() - AP2.countLeadingZeros();
1171 
1172  if (Shift > 0) {
1173  if (IsAShr && AP1 == AP2.ashr(Shift)) {
1174  // There are multiple solutions if we are comparing against -1 and the LHS
1175  // of the ashr is not a power of two.
1176  if (AP1.isAllOnesValue() && !AP2.isPowerOf2())
1177  return getICmp(I.ICMP_UGE, A, ConstantInt::get(A->getType(), Shift));
1178  return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
1179  } else if (AP1 == AP2.lshr(Shift)) {
1180  return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
1181  }
1182  }
1183 
1184  // Shifting const2 will never be equal to const1.
1185  // FIXME: This should always be handled by InstSimplify?
1186  auto *TorF = ConstantInt::get(I.getType(), I.getPredicate() == I.ICMP_NE);
1187  return replaceInstUsesWith(I, TorF);
1188 }
1189 
1190 /// Handle "(icmp eq/ne (shl AP2, A), AP1)" ->
1191 /// (icmp eq/ne A, TrailingZeros(AP1) - TrailingZeros(AP2)).
1192 Instruction *InstCombiner::foldICmpShlConstConst(ICmpInst &I, Value *A,
1193  const APInt &AP1,
1194  const APInt &AP2) {
1195  assert(I.isEquality() && "Cannot fold icmp gt/lt");
1196 
1197  auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
1198  if (I.getPredicate() == I.ICMP_NE)
1199  Pred = CmpInst::getInversePredicate(Pred);
1200  return new ICmpInst(Pred, LHS, RHS);
1201  };
1202 
1203  // Don't bother doing any work for cases which InstSimplify handles.
1204  if (AP2.isNullValue())
1205  return nullptr;
1206 
1207  unsigned AP2TrailingZeros = AP2.countTrailingZeros();
1208 
1209  if (!AP1 && AP2TrailingZeros != 0)
1210  return getICmp(
1211  I.ICMP_UGE, A,
1212  ConstantInt::get(A->getType(), AP2.getBitWidth() - AP2TrailingZeros));
1213 
1214  if (AP1 == AP2)
1215  return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
1216 
1217  // Get the distance between the lowest bits that are set.
1218  int Shift = AP1.countTrailingZeros() - AP2TrailingZeros;
1219 
1220  if (Shift > 0 && AP2.shl(Shift) == AP1)
1221  return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
1222 
1223  // Shifting const2 will never be equal to const1.
1224  // FIXME: This should always be handled by InstSimplify?
1225  auto *TorF = ConstantInt::get(I.getType(), I.getPredicate() == I.ICMP_NE);
1226  return replaceInstUsesWith(I, TorF);
1227 }
1228 
1229 /// The caller has matched a pattern of the form:
1230 /// I = icmp ugt (add (add A, B), CI2), CI1
1231 /// If this is of the form:
1232 /// sum = a + b
1233 /// if (sum+128 >u 255)
1234 /// Then replace it with llvm.sadd.with.overflow.i8.
1235 ///
1237  ConstantInt *CI2, ConstantInt *CI1,
1238  InstCombiner &IC) {
1239  // The transformation we're trying to do here is to transform this into an
1240  // llvm.sadd.with.overflow. To do this, we have to replace the original add
1241  // with a narrower add, and discard the add-with-constant that is part of the
1242  // range check (if we can't eliminate it, this isn't profitable).
1243 
1244  // In order to eliminate the add-with-constant, the compare can be its only
1245  // use.
1246  Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
1247  if (!AddWithCst->hasOneUse())
1248  return nullptr;
1249 
1250  // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
1251  if (!CI2->getValue().isPowerOf2())
1252  return nullptr;
1253  unsigned NewWidth = CI2->getValue().countTrailingZeros();
1254  if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31)
1255  return nullptr;
1256 
1257  // The width of the new add formed is 1 more than the bias.
1258  ++NewWidth;
1259 
1260  // Check to see that CI1 is an all-ones value with NewWidth bits.
1261  if (CI1->getBitWidth() == NewWidth ||
1262  CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
1263  return nullptr;
1264 
1265  // This is only really a signed overflow check if the inputs have been
1266  // sign-extended; check for that condition. For example, if CI2 is 2^31 and
1267  // the operands of the add are 64 bits wide, we need at least 33 sign bits.
1268  unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1;
1269  if (IC.ComputeNumSignBits(A, 0, &I) < NeededSignBits ||
1270  IC.ComputeNumSignBits(B, 0, &I) < NeededSignBits)
1271  return nullptr;
1272 
1273  // In order to replace the original add with a narrower
1274  // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
1275  // and truncates that discard the high bits of the add. Verify that this is
1276  // the case.
1277  Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0));
1278  for (User *U : OrigAdd->users()) {
1279  if (U == AddWithCst)
1280  continue;
1281 
1282  // Only accept truncates for now. We would really like a nice recursive
1283  // predicate like SimplifyDemandedBits, but which goes downwards the use-def
1284  // chain to see which bits of a value are actually demanded. If the
1285  // original add had another add which was then immediately truncated, we
1286  // could still do the transformation.
1287  TruncInst *TI = dyn_cast<TruncInst>(U);
1288  if (!TI || TI->getType()->getPrimitiveSizeInBits() > NewWidth)
1289  return nullptr;
1290  }
1291 
1292  // If the pattern matches, truncate the inputs to the narrower type and
1293  // use the sadd_with_overflow intrinsic to efficiently compute both the
1294  // result and the overflow bit.
1295  Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
1297  Intrinsic::sadd_with_overflow, NewType);
1298 
1299  InstCombiner::BuilderTy &Builder = IC.Builder;
1300 
1301  // Put the new code above the original add, in case there are any uses of the
1302  // add between the add and the compare.
1303  Builder.SetInsertPoint(OrigAdd);
1304 
1305  Value *TruncA = Builder.CreateTrunc(A, NewType, A->getName() + ".trunc");
1306  Value *TruncB = Builder.CreateTrunc(B, NewType, B->getName() + ".trunc");
1307  CallInst *Call = Builder.CreateCall(F, {TruncA, TruncB}, "sadd");
1308  Value *Add = Builder.CreateExtractValue(Call, 0, "sadd.result");
1309  Value *ZExt = Builder.CreateZExt(Add, OrigAdd->getType());
1310 
1311  // The inner add was the result of the narrow add, zero extended to the
1312  // wider type. Replace it with the result computed by the intrinsic.
1313  IC.replaceInstUsesWith(*OrigAdd, ZExt);
1314 
1315  // The original icmp gets replaced with the overflow value.
1316  return ExtractValueInst::Create(Call, 1, "sadd.overflow");
1317 }
1318 
1319 // Handle (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0)
1320 Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) {
1321  CmpInst::Predicate Pred = Cmp.getPredicate();
1322  Value *X = Cmp.getOperand(0);
1323 
1324  if (match(Cmp.getOperand(1), m_Zero()) && Pred == ICmpInst::ICMP_SGT) {
1325  Value *A, *B;
1326  SelectPatternResult SPR = matchSelectPattern(X, A, B);
1327  if (SPR.Flavor == SPF_SMIN) {
1328  if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT))
1329  return new ICmpInst(Pred, B, Cmp.getOperand(1));
1330  if (isKnownPositive(B, DL, 0, &AC, &Cmp, &DT))
1331  return new ICmpInst(Pred, A, Cmp.getOperand(1));
1332  }
1333  }
1334  return nullptr;
1335 }
1336 
1337 // Fold icmp Pred X, C.
1338 Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
1339  CmpInst::Predicate Pred = Cmp.getPredicate();
1340  Value *X = Cmp.getOperand(0);
1341 
1342  const APInt *C;
1343  if (!match(Cmp.getOperand(1), m_APInt(C)))
1344  return nullptr;
1345 
1346  Value *A = nullptr, *B = nullptr;
1347 
1348  // Match the following pattern, which is a common idiom when writing
1349  // overflow-safe integer arithmetic functions. The source performs an addition
1350  // in wider type and explicitly checks for overflow using comparisons against
1351  // INT_MIN and INT_MAX. Simplify by using the sadd_with_overflow intrinsic.
1352  //
1353  // TODO: This could probably be generalized to handle other overflow-safe
1354  // operations if we worked out the formulas to compute the appropriate magic
1355  // constants.
1356  //
1357  // sum = a + b
1358  // if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8
1359  {
1360  ConstantInt *CI2; // I = icmp ugt (add (add A, B), CI2), CI
1361  if (Pred == ICmpInst::ICMP_UGT &&
1362  match(X, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
1364  Cmp, A, B, CI2, cast<ConstantInt>(Cmp.getOperand(1)), *this))
1365  return Res;
1366  }
1367 
1368  // FIXME: Use m_APInt to allow folds for splat constants.
1369  ConstantInt *CI = dyn_cast<ConstantInt>(Cmp.getOperand(1));
1370  if (!CI)
1371  return nullptr;
1372 
1373  // Canonicalize icmp instructions based on dominating conditions.
1374  BasicBlock *Parent = Cmp.getParent();
1375  BasicBlock *Dom = Parent->getSinglePredecessor();
1376  auto *BI = Dom ? dyn_cast<BranchInst>(Dom->getTerminator()) : nullptr;
1377  ICmpInst::Predicate Pred2;
1378  BasicBlock *TrueBB, *FalseBB;
1379  ConstantInt *CI2;
1380  if (BI && match(BI, m_Br(m_ICmp(Pred2, m_Specific(X), m_ConstantInt(CI2)),
1381  TrueBB, FalseBB)) &&
1382  TrueBB != FalseBB) {
1383  ConstantRange CR =
1385  ConstantRange DominatingCR =
1386  (Parent == TrueBB)
1389  CmpInst::getInversePredicate(Pred2), CI2->getValue());
1390  ConstantRange Intersection = DominatingCR.intersectWith(CR);
1391  ConstantRange Difference = DominatingCR.difference(CR);
1392  if (Intersection.isEmptySet())
1393  return replaceInstUsesWith(Cmp, Builder.getFalse());
1394  if (Difference.isEmptySet())
1395  return replaceInstUsesWith(Cmp, Builder.getTrue());
1396 
1397  // If this is a normal comparison, it demands all bits. If it is a sign
1398  // bit comparison, it only demands the sign bit.
1399  bool UnusedBit;
1400  bool IsSignBit = isSignBitCheck(Pred, CI->getValue(), UnusedBit);
1401 
1402  // Canonicalizing a sign bit comparison that gets used in a branch,
1403  // pessimizes codegen by generating branch on zero instruction instead
1404  // of a test and branch. So we avoid canonicalizing in such situations
1405  // because test and branch instruction has better branch displacement
1406  // than compare and branch instruction.
1407  if (Cmp.isEquality() || (IsSignBit && hasBranchUse(Cmp)))
1408  return nullptr;
1409 
1410  if (auto *AI = Intersection.getSingleElement())
1411  return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*AI));
1412  if (auto *AD = Difference.getSingleElement())
1413  return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*AD));
1414  }
1415 
1416  return nullptr;
1417 }
1418 
1419 /// Fold icmp (trunc X, Y), C.
1420 Instruction *InstCombiner::foldICmpTruncConstant(ICmpInst &Cmp,
1421  TruncInst *Trunc,
1422  const APInt &C) {
1423  ICmpInst::Predicate Pred = Cmp.getPredicate();
1424  Value *X = Trunc->getOperand(0);
1425  if (C.isOneValue() && C.getBitWidth() > 1) {
1426  // icmp slt trunc(signum(V)) 1 --> icmp slt V, 1
1427  Value *V = nullptr;
1428  if (Pred == ICmpInst::ICMP_SLT && match(X, m_Signum(m_Value(V))))
1429  return new ICmpInst(ICmpInst::ICMP_SLT, V,
1430  ConstantInt::get(V->getType(), 1));
1431  }
1432 
1433  if (Cmp.isEquality() && Trunc->hasOneUse()) {
1434  // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
1435  // of the high bits truncated out of x are known.
1436  unsigned DstBits = Trunc->getType()->getScalarSizeInBits(),
1437  SrcBits = X->getType()->getScalarSizeInBits();
1438  KnownBits Known = computeKnownBits(X, 0, &Cmp);
1439 
1440  // If all the high bits are known, we can do this xform.
1441  if ((Known.Zero | Known.One).countLeadingOnes() >= SrcBits - DstBits) {
1442  // Pull in the high bits from known-ones set.
1443  APInt NewRHS = C.zext(SrcBits);
1444  NewRHS |= Known.One & APInt::getHighBitsSet(SrcBits, SrcBits - DstBits);
1445  return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), NewRHS));
1446  }
1447  }
1448 
1449  return nullptr;
1450 }
1451 
1452 /// Fold icmp (xor X, Y), C.
1453 Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp,
1454  BinaryOperator *Xor,
1455  const APInt &C) {
1456  Value *X = Xor->getOperand(0);
1457  Value *Y = Xor->getOperand(1);
1458  const APInt *XorC;
1459  if (!match(Y, m_APInt(XorC)))
1460  return nullptr;
1461 
1462  // If this is a comparison that tests the signbit (X < 0) or (x > -1),
1463  // fold the xor.
1464  ICmpInst::Predicate Pred = Cmp.getPredicate();
1465  bool TrueIfSigned = false;
1466  if (isSignBitCheck(Cmp.getPredicate(), C, TrueIfSigned)) {
1467 
1468  // If the sign bit of the XorCst is not set, there is no change to
1469  // the operation, just stop using the Xor.
1470  if (!XorC->isNegative()) {
1471  Cmp.setOperand(0, X);
1472  Worklist.Add(Xor);
1473  return &Cmp;
1474  }
1475 
1476  // Emit the opposite comparison.
1477  if (TrueIfSigned)
1478  return new ICmpInst(ICmpInst::ICMP_SGT, X,
1480  else
1481  return new ICmpInst(ICmpInst::ICMP_SLT, X,
1483  }
1484 
1485  if (Xor->hasOneUse()) {
1486  // (icmp u/s (xor X SignMask), C) -> (icmp s/u X, (xor C SignMask))
1487  if (!Cmp.isEquality() && XorC->isSignMask()) {
1488  Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate()
1489  : Cmp.getSignedPredicate();
1490  return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC));
1491  }
1492 
1493  // (icmp u/s (xor X ~SignMask), C) -> (icmp s/u X, (xor C ~SignMask))
1494  if (!Cmp.isEquality() && XorC->isMaxSignedValue()) {
1495  Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate()
1496  : Cmp.getSignedPredicate();
1497  Pred = Cmp.getSwappedPredicate(Pred);
1498  return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC));
1499  }
1500  }
1501 
1502  // Mask constant magic can eliminate an 'xor' with unsigned compares.
1503  if (Pred == ICmpInst::ICMP_UGT) {
1504  // (xor X, ~C) >u C --> X <u ~C (when C+1 is a power of 2)
1505  if (*XorC == ~C && (C + 1).isPowerOf2())
1506  return new ICmpInst(ICmpInst::ICMP_ULT, X, Y);
1507  // (xor X, C) >u C --> X >u C (when C+1 is a power of 2)
1508  if (*XorC == C && (C + 1).isPowerOf2())
1509  return new ICmpInst(ICmpInst::ICMP_UGT, X, Y);
1510  }
1511  if (Pred == ICmpInst::ICMP_ULT) {
1512  // (xor X, -C) <u C --> X >u ~C (when C is a power of 2)
1513  if (*XorC == -C && C.isPowerOf2())
1514  return new ICmpInst(ICmpInst::ICMP_UGT, X,
1515  ConstantInt::get(X->getType(), ~C));
1516  // (xor X, C) <u C --> X >u ~C (when -C is a power of 2)
1517  if (*XorC == C && (-C).isPowerOf2())
1518  return new ICmpInst(ICmpInst::ICMP_UGT, X,
1519  ConstantInt::get(X->getType(), ~C));
1520  }
1521  return nullptr;
1522 }
1523 
1524 /// Fold icmp (and (sh X, Y), C2), C1.
1525 Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And,
1526  const APInt &C1, const APInt &C2) {
1527  BinaryOperator *Shift = dyn_cast<BinaryOperator>(And->getOperand(0));
1528  if (!Shift || !Shift->isShift())
1529  return nullptr;
1530 
1531  // If this is: (X >> C3) & C2 != C1 (where any shift and any compare could
1532  // exist), turn it into (X & (C2 << C3)) != (C1 << C3). This happens a LOT in
1533  // code produced by the clang front-end, for bitfield access.
1534  // This seemingly simple opportunity to fold away a shift turns out to be
1535  // rather complicated. See PR17827 for details.
1536  unsigned ShiftOpcode = Shift->getOpcode();
1537  bool IsShl = ShiftOpcode == Instruction::Shl;
1538  const APInt *C3;
1539  if (match(Shift->getOperand(1), m_APInt(C3))) {
1540  bool CanFold = false;
1541  if (ShiftOpcode == Instruction::Shl) {
1542  // For a left shift, we can fold if the comparison is not signed. We can
1543  // also fold a signed comparison if the mask value and comparison value
1544  // are not negative. These constraints may not be obvious, but we can
1545  // prove that they are correct using an SMT solver.
1546  if (!Cmp.isSigned() || (!C2.isNegative() && !C1.isNegative()))
1547  CanFold = true;
1548  } else {
1549  bool IsAshr = ShiftOpcode == Instruction::AShr;
1550  // For a logical right shift, we can fold if the comparison is not signed.
1551  // We can also fold a signed comparison if the shifted mask value and the
1552  // shifted comparison value are not negative. These constraints may not be
1553  // obvious, but we can prove that they are correct using an SMT solver.
1554  // For an arithmetic shift right we can do the same, if we ensure
1555  // the And doesn't use any bits being shifted in. Normally these would
1556  // be turned into lshr by SimplifyDemandedBits, but not if there is an
1557  // additional user.
1558  if (!IsAshr || (C2.shl(*C3).lshr(*C3) == C2)) {
1559  if (!Cmp.isSigned() ||
1560  (!C2.shl(*C3).isNegative() && !C1.shl(*C3).isNegative()))
1561  CanFold = true;
1562  }
1563  }
1564 
1565  if (CanFold) {
1566  APInt NewCst = IsShl ? C1.lshr(*C3) : C1.shl(*C3);
1567  APInt SameAsC1 = IsShl ? NewCst.shl(*C3) : NewCst.lshr(*C3);
1568  // Check to see if we are shifting out any of the bits being compared.
1569  if (SameAsC1 != C1) {
1570  // If we shifted bits out, the fold is not going to work out. As a
1571  // special case, check to see if this means that the result is always
1572  // true or false now.
1573  if (Cmp.getPredicate() == ICmpInst::ICMP_EQ)
1574  return replaceInstUsesWith(Cmp, ConstantInt::getFalse(Cmp.getType()));
1575  if (Cmp.getPredicate() == ICmpInst::ICMP_NE)
1576  return replaceInstUsesWith(Cmp, ConstantInt::getTrue(Cmp.getType()));
1577  } else {
1578  Cmp.setOperand(1, ConstantInt::get(And->getType(), NewCst));
1579  APInt NewAndCst = IsShl ? C2.lshr(*C3) : C2.shl(*C3);
1580  And->setOperand(1, ConstantInt::get(And->getType(), NewAndCst));
1581  And->setOperand(0, Shift->getOperand(0));
1582  Worklist.Add(Shift); // Shift is dead.
1583  return &Cmp;
1584  }
1585  }
1586  }
1587 
1588  // Turn ((X >> Y) & C2) == 0 into (X & (C2 << Y)) == 0. The latter is
1589  // preferable because it allows the C2 << Y expression to be hoisted out of a
1590  // loop if Y is invariant and X is not.
1591  if (Shift->hasOneUse() && C1.isNullValue() && Cmp.isEquality() &&
1592  !Shift->isArithmeticShift() && !isa<Constant>(Shift->getOperand(0))) {
1593  // Compute C2 << Y.
1594  Value *NewShift =
1595  IsShl ? Builder.CreateLShr(And->getOperand(1), Shift->getOperand(1))
1596  : Builder.CreateShl(And->getOperand(1), Shift->getOperand(1));
1597 
1598  // Compute X & (C2 << Y).
1599  Value *NewAnd = Builder.CreateAnd(Shift->getOperand(0), NewShift);
1600  Cmp.setOperand(0, NewAnd);
1601  return &Cmp;
1602  }
1603 
1604  return nullptr;
1605 }
1606 
1607 /// Fold icmp (and X, C2), C1.
1608 Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp,
1609  BinaryOperator *And,
1610  const APInt &C1) {
1611  const APInt *C2;
1612  if (!match(And->getOperand(1), m_APInt(C2)))
1613  return nullptr;
1614 
1615  if (!And->hasOneUse())
1616  return nullptr;
1617 
1618  // If the LHS is an 'and' of a truncate and we can widen the and/compare to
1619  // the input width without changing the value produced, eliminate the cast:
1620  //
1621  // icmp (and (trunc W), C2), C1 -> icmp (and W, C2'), C1'
1622  //
1623  // We can do this transformation if the constants do not have their sign bits
1624  // set or if it is an equality comparison. Extending a relational comparison
1625  // when we're checking the sign bit would not work.
1626  Value *W;
1627  if (match(And->getOperand(0), m_OneUse(m_Trunc(m_Value(W)))) &&
1628  (Cmp.isEquality() || (!C1.isNegative() && !C2->isNegative()))) {
1629  // TODO: Is this a good transform for vectors? Wider types may reduce
1630  // throughput. Should this transform be limited (even for scalars) by using
1631  // shouldChangeType()?
1632  if (!Cmp.getType()->isVectorTy()) {
1633  Type *WideType = W->getType();
1634  unsigned WideScalarBits = WideType->getScalarSizeInBits();
1635  Constant *ZextC1 = ConstantInt::get(WideType, C1.zext(WideScalarBits));
1636  Constant *ZextC2 = ConstantInt::get(WideType, C2->zext(WideScalarBits));
1637  Value *NewAnd = Builder.CreateAnd(W, ZextC2, And->getName());
1638  return new ICmpInst(Cmp.getPredicate(), NewAnd, ZextC1);
1639  }
1640  }
1641 
1642  if (Instruction *I = foldICmpAndShift(Cmp, And, C1, *C2))
1643  return I;
1644 
1645  // (icmp pred (and (or (lshr A, B), A), 1), 0) -->
1646  // (icmp pred (and A, (or (shl 1, B), 1), 0))
1647  //
1648  // iff pred isn't signed
1649  if (!Cmp.isSigned() && C1.isNullValue() && And->getOperand(0)->hasOneUse() &&
1650  match(And->getOperand(1), m_One())) {
1651  Constant *One = cast<Constant>(And->getOperand(1));
1652  Value *Or = And->getOperand(0);
1653  Value *A, *B, *LShr;
1654  if (match(Or, m_Or(m_Value(LShr), m_Value(A))) &&
1655  match(LShr, m_LShr(m_Specific(A), m_Value(B)))) {
1656  unsigned UsesRemoved = 0;
1657  if (And->hasOneUse())
1658  ++UsesRemoved;
1659  if (Or->hasOneUse())
1660  ++UsesRemoved;
1661  if (LShr->hasOneUse())
1662  ++UsesRemoved;
1663 
1664  // Compute A & ((1 << B) | 1)
1665  Value *NewOr = nullptr;
1666  if (auto *C = dyn_cast<Constant>(B)) {
1667  if (UsesRemoved >= 1)
1668  NewOr = ConstantExpr::getOr(ConstantExpr::getNUWShl(One, C), One);
1669  } else {
1670  if (UsesRemoved >= 3)
1671  NewOr = Builder.CreateOr(Builder.CreateShl(One, B, LShr->getName(),
1672  /*HasNUW=*/true),
1673  One, Or->getName());
1674  }
1675  if (NewOr) {
1676  Value *NewAnd = Builder.CreateAnd(A, NewOr, And->getName());
1677  Cmp.setOperand(0, NewAnd);
1678  return &Cmp;
1679  }
1680  }
1681  }
1682 
1683  return nullptr;
1684 }
1685 
1686 /// Fold icmp (and X, Y), C.
1687 Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp,
1688  BinaryOperator *And,
1689  const APInt &C) {
1690  if (Instruction *I = foldICmpAndConstConst(Cmp, And, C))
1691  return I;
1692 
1693  // TODO: These all require that Y is constant too, so refactor with the above.
1694 
1695  // Try to optimize things like "A[i] & 42 == 0" to index computations.
1696  Value *X = And->getOperand(0);
1697  Value *Y = And->getOperand(1);
1698  if (auto *LI = dyn_cast<LoadInst>(X))
1699  if (auto *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
1700  if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
1701  if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
1702  !LI->isVolatile() && isa<ConstantInt>(Y)) {
1703  ConstantInt *C2 = cast<ConstantInt>(Y);
1704  if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, Cmp, C2))
1705  return Res;
1706  }
1707 
1708  if (!Cmp.isEquality())
1709  return nullptr;
1710 
1711  // X & -C == -C -> X > u ~C
1712  // X & -C != -C -> X <= u ~C
1713  // iff C is a power of 2
1714  if (Cmp.getOperand(1) == Y && (-C).isPowerOf2()) {
1715  auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT
1717  return new ICmpInst(NewPred, X, SubOne(cast<Constant>(Cmp.getOperand(1))));
1718  }
1719 
1720  // (X & C2) == 0 -> (trunc X) >= 0
1721  // (X & C2) != 0 -> (trunc X) < 0
1722  // iff C2 is a power of 2 and it masks the sign bit of a legal integer type.
1723  const APInt *C2;
1724  if (And->hasOneUse() && C.isNullValue() && match(Y, m_APInt(C2))) {
1725  int32_t ExactLogBase2 = C2->exactLogBase2();
1726  if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) {
1727  Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1);
1728  if (And->getType()->isVectorTy())
1729  NTy = VectorType::get(NTy, And->getType()->getVectorNumElements());
1730  Value *Trunc = Builder.CreateTrunc(X, NTy);
1731  auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_SGE
1733  return new ICmpInst(NewPred, Trunc, Constant::getNullValue(NTy));
1734  }
1735  }
1736 
1737  return nullptr;
1738 }
1739 
1740 /// Fold icmp (or X, Y), C.
1741 Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or,
1742  const APInt &C) {
1743  ICmpInst::Predicate Pred = Cmp.getPredicate();
1744  if (C.isOneValue()) {
1745  // icmp slt signum(V) 1 --> icmp slt V, 1
1746  Value *V = nullptr;
1747  if (Pred == ICmpInst::ICMP_SLT && match(Or, m_Signum(m_Value(V))))
1748  return new ICmpInst(ICmpInst::ICMP_SLT, V,
1749  ConstantInt::get(V->getType(), 1));
1750  }
1751 
1752  // X | C == C --> X <=u C
1753  // X | C != C --> X >u C
1754  // iff C+1 is a power of 2 (C is a bitmask of the low bits)
1755  if (Cmp.isEquality() && Cmp.getOperand(1) == Or->getOperand(1) &&
1756  (C + 1).isPowerOf2()) {
1758  return new ICmpInst(Pred, Or->getOperand(0), Or->getOperand(1));
1759  }
1760 
1761  if (!Cmp.isEquality() || !C.isNullValue() || !Or->hasOneUse())
1762  return nullptr;
1763 
1764  Value *P, *Q;
1765  if (match(Or, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
1766  // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
1767  // -> and (icmp eq P, null), (icmp eq Q, null).
1768  Value *CmpP =
1769  Builder.CreateICmp(Pred, P, ConstantInt::getNullValue(P->getType()));
1770  Value *CmpQ =
1771  Builder.CreateICmp(Pred, Q, ConstantInt::getNullValue(Q->getType()));
1772  auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
1773  return BinaryOperator::Create(BOpc, CmpP, CmpQ);
1774  }
1775 
1776  // Are we using xors to bitwise check for a pair of (in)equalities? Convert to
1777  // a shorter form that has more potential to be folded even further.
1778  Value *X1, *X2, *X3, *X4;
1779  if (match(Or->getOperand(0), m_OneUse(m_Xor(m_Value(X1), m_Value(X2)))) &&
1780  match(Or->getOperand(1), m_OneUse(m_Xor(m_Value(X3), m_Value(X4))))) {
1781  // ((X1 ^ X2) || (X3 ^ X4)) == 0 --> (X1 == X2) && (X3 == X4)
1782  // ((X1 ^ X2) || (X3 ^ X4)) != 0 --> (X1 != X2) || (X3 != X4)
1783  Value *Cmp12 = Builder.CreateICmp(Pred, X1, X2);
1784  Value *Cmp34 = Builder.CreateICmp(Pred, X3, X4);
1785  auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
1786  return BinaryOperator::Create(BOpc, Cmp12, Cmp34);
1787  }
1788 
1789  return nullptr;
1790 }
1791 
1792 /// Fold icmp (mul X, Y), C.
1793 Instruction *InstCombiner::foldICmpMulConstant(ICmpInst &Cmp,
1794  BinaryOperator *Mul,
1795  const APInt &C) {
1796  const APInt *MulC;
1797  if (!match(Mul->getOperand(1), m_APInt(MulC)))
1798  return nullptr;
1799 
1800  // If this is a test of the sign bit and the multiply is sign-preserving with
1801  // a constant operand, use the multiply LHS operand instead.
1802  ICmpInst::Predicate Pred = Cmp.getPredicate();
1803  if (isSignTest(Pred, C) && Mul->hasNoSignedWrap()) {
1804  if (MulC->isNegative())
1805  Pred = ICmpInst::getSwappedPredicate(Pred);
1806  return new ICmpInst(Pred, Mul->getOperand(0),
1808  }
1809 
1810  return nullptr;
1811 }
1812 
1813 /// Fold icmp (shl 1, Y), C.
1815  const APInt &C) {
1816  Value *Y;
1817  if (!match(Shl, m_Shl(m_One(), m_Value(Y))))
1818  return nullptr;
1819 
1820  Type *ShiftType = Shl->getType();
1821  unsigned TypeBits = C.getBitWidth();
1822  bool CIsPowerOf2 = C.isPowerOf2();
1823  ICmpInst::Predicate Pred = Cmp.getPredicate();
1824  if (Cmp.isUnsigned()) {
1825  // (1 << Y) pred C -> Y pred Log2(C)
1826  if (!CIsPowerOf2) {
1827  // (1 << Y) < 30 -> Y <= 4
1828  // (1 << Y) <= 30 -> Y <= 4
1829  // (1 << Y) >= 30 -> Y > 4
1830  // (1 << Y) > 30 -> Y > 4
1831  if (Pred == ICmpInst::ICMP_ULT)
1832  Pred = ICmpInst::ICMP_ULE;
1833  else if (Pred == ICmpInst::ICMP_UGE)
1834  Pred = ICmpInst::ICMP_UGT;
1835  }
1836 
1837  // (1 << Y) >= 2147483648 -> Y >= 31 -> Y == 31
1838  // (1 << Y) < 2147483648 -> Y < 31 -> Y != 31
1839  unsigned CLog2 = C.logBase2();
1840  if (CLog2 == TypeBits - 1) {
1841  if (Pred == ICmpInst::ICMP_UGE)
1842  Pred = ICmpInst::ICMP_EQ;
1843  else if (Pred == ICmpInst::ICMP_ULT)
1844  Pred = ICmpInst::ICMP_NE;
1845  }
1846  return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, CLog2));
1847  } else if (Cmp.isSigned()) {
1848  Constant *BitWidthMinusOne = ConstantInt::get(ShiftType, TypeBits - 1);
1849  if (C.isAllOnesValue()) {
1850  // (1 << Y) <= -1 -> Y == 31
1851  if (Pred == ICmpInst::ICMP_SLE)
1852  return new ICmpInst(ICmpInst::ICMP_EQ, Y, BitWidthMinusOne);
1853 
1854  // (1 << Y) > -1 -> Y != 31
1855  if (Pred == ICmpInst::ICMP_SGT)
1856  return new ICmpInst(ICmpInst::ICMP_NE, Y, BitWidthMinusOne);
1857  } else if (!C) {
1858  // (1 << Y) < 0 -> Y == 31
1859  // (1 << Y) <= 0 -> Y == 31
1860  if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
1861  return new ICmpInst(ICmpInst::ICMP_EQ, Y, BitWidthMinusOne);
1862 
1863  // (1 << Y) >= 0 -> Y != 31
1864  // (1 << Y) > 0 -> Y != 31
1865  if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE)
1866  return new ICmpInst(ICmpInst::ICMP_NE, Y, BitWidthMinusOne);
1867  }
1868  } else if (Cmp.isEquality() && CIsPowerOf2) {
1869  return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, C.logBase2()));
1870  }
1871 
1872  return nullptr;
1873 }
1874 
1875 /// Fold icmp (shl X, Y), C.
1876 Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
1877  BinaryOperator *Shl,
1878  const APInt &C) {
1879  const APInt *ShiftVal;
1880  if (Cmp.isEquality() && match(Shl->getOperand(0), m_APInt(ShiftVal)))
1881  return foldICmpShlConstConst(Cmp, Shl->getOperand(1), C, *ShiftVal);
1882 
1883  const APInt *ShiftAmt;
1884  if (!match(Shl->getOperand(1), m_APInt(ShiftAmt)))
1885  return foldICmpShlOne(Cmp, Shl, C);
1886 
1887  // Check that the shift amount is in range. If not, don't perform undefined
1888  // shifts. When the shift is visited, it will be simplified.
1889  unsigned TypeBits = C.getBitWidth();
1890  if (ShiftAmt->uge(TypeBits))
1891  return nullptr;
1892 
1893  ICmpInst::Predicate Pred = Cmp.getPredicate();
1894  Value *X = Shl->getOperand(0);
1895  Type *ShType = Shl->getType();
1896 
1897  // NSW guarantees that we are only shifting out sign bits from the high bits,
1898  // so we can ASHR the compare constant without needing a mask and eliminate
1899  // the shift.
1900  if (Shl->hasNoSignedWrap()) {
1901  if (Pred == ICmpInst::ICMP_SGT) {
1902  // icmp Pred (shl nsw X, ShiftAmt), C --> icmp Pred X, (C >>s ShiftAmt)
1903  APInt ShiftedC = C.ashr(*ShiftAmt);
1904  return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
1905  }
1906  if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
1907  C.ashr(*ShiftAmt).shl(*ShiftAmt) == C) {
1908  APInt ShiftedC = C.ashr(*ShiftAmt);
1909  return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
1910  }
1911  if (Pred == ICmpInst::ICMP_SLT) {
1912  // SLE is the same as above, but SLE is canonicalized to SLT, so convert:
1913  // (X << S) <=s C is equiv to X <=s (C >> S) for all C
1914  // (X << S) <s (C + 1) is equiv to X <s (C >> S) + 1 if C <s SMAX
1915  // (X << S) <s C is equiv to X <s ((C - 1) >> S) + 1 if C >s SMIN
1916  assert(!C.isMinSignedValue() && "Unexpected icmp slt");
1917  APInt ShiftedC = (C - 1).ashr(*ShiftAmt) + 1;
1918  return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
1919  }
1920  // If this is a signed comparison to 0 and the shift is sign preserving,
1921  // use the shift LHS operand instead; isSignTest may change 'Pred', so only
1922  // do that if we're sure to not continue on in this function.
1923  if (isSignTest(Pred, C))
1924  return new ICmpInst(Pred, X, Constant::getNullValue(ShType));
1925  }
1926 
1927  // NUW guarantees that we are only shifting out zero bits from the high bits,
1928  // so we can LSHR the compare constant without needing a mask and eliminate
1929  // the shift.
1930  if (Shl->hasNoUnsignedWrap()) {
1931  if (Pred == ICmpInst::ICMP_UGT) {
1932  // icmp Pred (shl nuw X, ShiftAmt), C --> icmp Pred X, (C >>u ShiftAmt)
1933  APInt ShiftedC = C.lshr(*ShiftAmt);
1934  return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
1935  }
1936  if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
1937  C.lshr(*ShiftAmt).shl(*ShiftAmt) == C) {
1938  APInt ShiftedC = C.lshr(*ShiftAmt);
1939  return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
1940  }
1941  if (Pred == ICmpInst::ICMP_ULT) {
1942  // ULE is the same as above, but ULE is canonicalized to ULT, so convert:
1943  // (X << S) <=u C is equiv to X <=u (C >> S) for all C
1944  // (X << S) <u (C + 1) is equiv to X <u (C >> S) + 1 if C <u ~0u
1945  // (X << S) <u C is equiv to X <u ((C - 1) >> S) + 1 if C >u 0
1946  assert(C.ugt(0) && "ult 0 should have been eliminated");
1947  APInt ShiftedC = (C - 1).lshr(*ShiftAmt) + 1;
1948  return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
1949  }
1950  }
1951 
1952  if (Cmp.isEquality() && Shl->hasOneUse()) {
1953  // Strength-reduce the shift into an 'and'.
1955  ShType,
1956  APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue()));
1957  Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask");
1958  Constant *LShrC = ConstantInt::get(ShType, C.lshr(*ShiftAmt));
1959  return new ICmpInst(Pred, And, LShrC);
1960  }
1961 
1962  // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
1963  bool TrueIfSigned = false;
1964  if (Shl->hasOneUse() && isSignBitCheck(Pred, C, TrueIfSigned)) {
1965  // (X << 31) <s 0 --> (X & 1) != 0
1967  ShType,
1968  APInt::getOneBitSet(TypeBits, TypeBits - ShiftAmt->getZExtValue() - 1));
1969  Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask");
1970  return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
1971  And, Constant::getNullValue(ShType));
1972  }
1973 
1974  // Transform (icmp pred iM (shl iM %v, N), C)
1975  // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (C>>N))
1976  // Transform the shl to a trunc if (trunc (C>>N)) has no loss and M-N.
1977  // This enables us to get rid of the shift in favor of a trunc that may be
1978  // free on the target. It has the additional benefit of comparing to a
1979  // smaller constant that may be more target-friendly.
1980  unsigned Amt = ShiftAmt->getLimitedValue(TypeBits - 1);
1981  if (Shl->hasOneUse() && Amt != 0 && C.countTrailingZeros() >= Amt &&
1982  DL.isLegalInteger(TypeBits - Amt)) {
1983  Type *TruncTy = IntegerType::get(Cmp.getContext(), TypeBits - Amt);
1984  if (ShType->isVectorTy())
1985  TruncTy = VectorType::get(TruncTy, ShType->getVectorNumElements());
1986  Constant *NewC =
1987  ConstantInt::get(TruncTy, C.ashr(*ShiftAmt).trunc(TypeBits - Amt));
1988  return new ICmpInst(Pred, Builder.CreateTrunc(X, TruncTy), NewC);
1989  }
1990 
1991  return nullptr;
1992 }
1993 
1994 /// Fold icmp ({al}shr X, Y), C.
1995 Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp,
1996  BinaryOperator *Shr,
1997  const APInt &C) {
1998  // An exact shr only shifts out zero bits, so:
1999  // icmp eq/ne (shr X, Y), 0 --> icmp eq/ne X, 0
2000  Value *X = Shr->getOperand(0);
2001  CmpInst::Predicate Pred = Cmp.getPredicate();
2002  if (Cmp.isEquality() && Shr->isExact() && Shr->hasOneUse() &&
2003  C.isNullValue())
2004  return new ICmpInst(Pred, X, Cmp.getOperand(1));
2005 
2006  const APInt *ShiftVal;
2007  if (Cmp.isEquality() && match(Shr->getOperand(0), m_APInt(ShiftVal)))
2008  return foldICmpShrConstConst(Cmp, Shr->getOperand(1), C, *ShiftVal);
2009 
2010  const APInt *ShiftAmt;
2011  if (!match(Shr->getOperand(1), m_APInt(ShiftAmt)))
2012  return nullptr;
2013 
2014  // Check that the shift amount is in range. If not, don't perform undefined
2015  // shifts. When the shift is visited it will be simplified.
2016  unsigned TypeBits = C.getBitWidth();
2017  unsigned ShAmtVal = ShiftAmt->getLimitedValue(TypeBits);
2018  if (ShAmtVal >= TypeBits || ShAmtVal == 0)
2019  return nullptr;
2020 
2021  bool IsAShr = Shr->getOpcode() == Instruction::AShr;
2022  bool IsExact = Shr->isExact();
2023  Type *ShrTy = Shr->getType();
2024  // TODO: If we could guarantee that InstSimplify would handle all of the
2025  // constant-value-based preconditions in the folds below, then we could assert
2026  // those conditions rather than checking them. This is difficult because of
2027  // undef/poison (PR34838).
2028  if (IsAShr) {
2029  if (Pred == CmpInst::ICMP_SLT || (Pred == CmpInst::ICMP_SGT && IsExact)) {
2030  // icmp slt (ashr X, ShAmtC), C --> icmp slt X, (C << ShAmtC)
2031  // icmp sgt (ashr exact X, ShAmtC), C --> icmp sgt X, (C << ShAmtC)
2032  APInt ShiftedC = C.shl(ShAmtVal);
2033  if (ShiftedC.ashr(ShAmtVal) == C)
2034  return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2035  }
2036  if (Pred == CmpInst::ICMP_SGT) {
2037  // icmp sgt (ashr X, ShAmtC), C --> icmp sgt X, ((C + 1) << ShAmtC) - 1
2038  APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
2039  if (!C.isMaxSignedValue() && !(C + 1).shl(ShAmtVal).isMinSignedValue() &&
2040  (ShiftedC + 1).ashr(ShAmtVal) == (C + 1))
2041  return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2042  }
2043  } else {
2044  if (Pred == CmpInst::ICMP_ULT || (Pred == CmpInst::ICMP_UGT && IsExact)) {
2045  // icmp ult (lshr X, ShAmtC), C --> icmp ult X, (C << ShAmtC)
2046  // icmp ugt (lshr exact X, ShAmtC), C --> icmp ugt X, (C << ShAmtC)
2047  APInt ShiftedC = C.shl(ShAmtVal);
2048  if (ShiftedC.lshr(ShAmtVal) == C)
2049  return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2050  }
2051  if (Pred == CmpInst::ICMP_UGT) {
2052  // icmp ugt (lshr X, ShAmtC), C --> icmp ugt X, ((C + 1) << ShAmtC) - 1
2053  APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
2054  if ((ShiftedC + 1).lshr(ShAmtVal) == (C + 1))
2055  return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2056  }
2057  }
2058 
2059  if (!Cmp.isEquality())
2060  return nullptr;
2061 
2062  // Handle equality comparisons of shift-by-constant.
2063 
2064  // If the comparison constant changes with the shift, the comparison cannot
2065  // succeed (bits of the comparison constant cannot match the shifted value).
2066  // This should be known by InstSimplify and already be folded to true/false.
2067  assert(((IsAShr && C.shl(ShAmtVal).ashr(ShAmtVal) == C) ||
2068  (!IsAShr && C.shl(ShAmtVal).lshr(ShAmtVal) == C)) &&
2069  "Expected icmp+shr simplify did not occur.");
2070 
2071  // If the bits shifted out are known zero, compare the unshifted value:
2072  // (X & 4) >> 1 == 2 --> (X & 4) == 4.
2073  if (Shr->isExact())
2074  return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, C << ShAmtVal));
2075 
2076  if (Shr->hasOneUse()) {
2077  // Canonicalize the shift into an 'and':
2078  // icmp eq/ne (shr X, ShAmt), C --> icmp eq/ne (and X, HiMask), (C << ShAmt)
2079  APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
2080  Constant *Mask = ConstantInt::get(ShrTy, Val);
2081  Value *And = Builder.CreateAnd(X, Mask, Shr->getName() + ".mask");
2082  return new ICmpInst(Pred, And, ConstantInt::get(ShrTy, C << ShAmtVal));
2083  }
2084 
2085  return nullptr;
2086 }
2087 
2088 /// Fold icmp (udiv X, Y), C.
2089 Instruction *InstCombiner::foldICmpUDivConstant(ICmpInst &Cmp,
2090  BinaryOperator *UDiv,
2091  const APInt &C) {
2092  const APInt *C2;
2093  if (!match(UDiv->getOperand(0), m_APInt(C2)))
2094  return nullptr;
2095 
2096  assert(*C2 != 0 && "udiv 0, X should have been simplified already.");
2097 
2098  // (icmp ugt (udiv C2, Y), C) -> (icmp ule Y, C2/(C+1))
2099  Value *Y = UDiv->getOperand(1);
2100  if (Cmp.getPredicate() == ICmpInst::ICMP_UGT) {
2101  assert(!C.isMaxValue() &&
2102  "icmp ugt X, UINT_MAX should have been simplified already.");
2103  return new ICmpInst(ICmpInst::ICMP_ULE, Y,
2104  ConstantInt::get(Y->getType(), C2->udiv(C + 1)));
2105  }
2106 
2107  // (icmp ult (udiv C2, Y), C) -> (icmp ugt Y, C2/C)
2108  if (Cmp.getPredicate() == ICmpInst::ICMP_ULT) {
2109  assert(C != 0 && "icmp ult X, 0 should have been simplified already.");
2110  return new ICmpInst(ICmpInst::ICMP_UGT, Y,
2111  ConstantInt::get(Y->getType(), C2->udiv(C)));
2112  }
2113 
2114  return nullptr;
2115 }
2116 
2117 /// Fold icmp ({su}div X, Y), C.
2118 Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
2119  BinaryOperator *Div,
2120  const APInt &C) {
2121  // Fold: icmp pred ([us]div X, C2), C -> range test
2122  // Fold this div into the comparison, producing a range check.
2123  // Determine, based on the divide type, what the range is being
2124  // checked. If there is an overflow on the low or high side, remember
2125  // it, otherwise compute the range [low, hi) bounding the new value.
2126  // See: InsertRangeTest above for the kinds of replacements possible.
2127  const APInt *C2;
2128  if (!match(Div->getOperand(1), m_APInt(C2)))
2129  return nullptr;
2130 
2131  // FIXME: If the operand types don't match the type of the divide
2132  // then don't attempt this transform. The code below doesn't have the
2133  // logic to deal with a signed divide and an unsigned compare (and
2134  // vice versa). This is because (x /s C2) <s C produces different
2135  // results than (x /s C2) <u C or (x /u C2) <s C or even
2136  // (x /u C2) <u C. Simply casting the operands and result won't
2137  // work. :( The if statement below tests that condition and bails
2138  // if it finds it.
2139  bool DivIsSigned = Div->getOpcode() == Instruction::SDiv;
2140  if (!Cmp.isEquality() && DivIsSigned != Cmp.isSigned())
2141  return nullptr;
2142 
2143  // The ProdOV computation fails on divide by 0 and divide by -1. Cases with
2144  // INT_MIN will also fail if the divisor is 1. Although folds of all these
2145  // division-by-constant cases should be present, we can not assert that they
2146  // have happened before we reach this icmp instruction.
2147  if (C2->isNullValue() || C2->isOneValue() ||
2148  (DivIsSigned && C2->isAllOnesValue()))
2149  return nullptr;
2150 
2151  // Compute Prod = C * C2. We are essentially solving an equation of
2152  // form X / C2 = C. We solve for X by multiplying C2 and C.
2153  // By solving for X, we can turn this into a range check instead of computing
2154  // a divide.
2155  APInt Prod = C * *C2;
2156 
2157  // Determine if the product overflows by seeing if the product is not equal to
2158  // the divide. Make sure we do the same kind of divide as in the LHS
2159  // instruction that we're folding.
2160  bool ProdOV = (DivIsSigned ? Prod.sdiv(*C2) : Prod.udiv(*C2)) != C;
2161 
2162  ICmpInst::Predicate Pred = Cmp.getPredicate();
2163 
2164  // If the division is known to be exact, then there is no remainder from the
2165  // divide, so the covered range size is unit, otherwise it is the divisor.
2166  APInt RangeSize = Div->isExact() ? APInt(C2->getBitWidth(), 1) : *C2;
2167 
2168  // Figure out the interval that is being checked. For example, a comparison
2169  // like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
2170  // Compute this interval based on the constants involved and the signedness of
2171  // the compare/divide. This computes a half-open interval, keeping track of
2172  // whether either value in the interval overflows. After analysis each
2173  // overflow variable is set to 0 if it's corresponding bound variable is valid
2174  // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
2175  int LoOverflow = 0, HiOverflow = 0;
2176  APInt LoBound, HiBound;
2177 
2178  if (!DivIsSigned) { // udiv
2179  // e.g. X/5 op 3 --> [15, 20)
2180  LoBound = Prod;
2181  HiOverflow = LoOverflow = ProdOV;
2182  if (!HiOverflow) {
2183  // If this is not an exact divide, then many values in the range collapse
2184  // to the same result value.
2185  HiOverflow = addWithOverflow(HiBound, LoBound, RangeSize, false);
2186  }
2187  } else if (C2->isStrictlyPositive()) { // Divisor is > 0.
2188  if (C.isNullValue()) { // (X / pos) op 0
2189  // Can't overflow. e.g. X/2 op 0 --> [-1, 2)
2190  LoBound = -(RangeSize - 1);
2191  HiBound = RangeSize;
2192  } else if (C.isStrictlyPositive()) { // (X / pos) op pos
2193  LoBound = Prod; // e.g. X/5 op 3 --> [15, 20)
2194  HiOverflow = LoOverflow = ProdOV;
2195  if (!HiOverflow)
2196  HiOverflow = addWithOverflow(HiBound, Prod, RangeSize, true);
2197  } else { // (X / pos) op neg
2198  // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14)
2199  HiBound = Prod + 1;
2200  LoOverflow = HiOverflow = ProdOV ? -1 : 0;
2201  if (!LoOverflow) {
2202  APInt DivNeg = -RangeSize;
2203  LoOverflow = addWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
2204  }
2205  }
2206  } else if (C2->isNegative()) { // Divisor is < 0.
2207  if (Div->isExact())
2208  RangeSize.negate();
2209  if (C.isNullValue()) { // (X / neg) op 0
2210  // e.g. X/-5 op 0 --> [-4, 5)
2211  LoBound = RangeSize + 1;
2212  HiBound = -RangeSize;
2213  if (HiBound == *C2) { // -INTMIN = INTMIN
2214  HiOverflow = 1; // [INTMIN+1, overflow)
2215  HiBound = APInt(); // e.g. X/INTMIN = 0 --> X > INTMIN
2216  }
2217  } else if (C.isStrictlyPositive()) { // (X / neg) op pos
2218  // e.g. X/-5 op 3 --> [-19, -14)
2219  HiBound = Prod + 1;
2220  HiOverflow = LoOverflow = ProdOV ? -1 : 0;
2221  if (!LoOverflow)
2222  LoOverflow = addWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0;
2223  } else { // (X / neg) op neg
2224  LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20)
2225  LoOverflow = HiOverflow = ProdOV;
2226  if (!HiOverflow)
2227  HiOverflow = subWithOverflow(HiBound, Prod, RangeSize, true);
2228  }
2229 
2230  // Dividing by a negative swaps the condition. LT <-> GT
2231  Pred = ICmpInst::getSwappedPredicate(Pred);
2232  }
2233 
2234  Value *X = Div->getOperand(0);
2235  switch (Pred) {
2236  default: llvm_unreachable("Unhandled icmp opcode!");
2237  case ICmpInst::ICMP_EQ:
2238  if (LoOverflow && HiOverflow)
2239  return replaceInstUsesWith(Cmp, Builder.getFalse());
2240  if (HiOverflow)
2241  return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
2242  ICmpInst::ICMP_UGE, X,
2243  ConstantInt::get(Div->getType(), LoBound));
2244  if (LoOverflow)
2245  return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
2246  ICmpInst::ICMP_ULT, X,
2247  ConstantInt::get(Div->getType(), HiBound));
2248  return replaceInstUsesWith(
2249  Cmp, insertRangeTest(X, LoBound, HiBound, DivIsSigned, true));
2250  case ICmpInst::ICMP_NE:
2251  if (LoOverflow && HiOverflow)
2252  return replaceInstUsesWith(Cmp, Builder.getTrue());
2253  if (HiOverflow)
2254  return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
2255  ICmpInst::ICMP_ULT, X,
2256  ConstantInt::get(Div->getType(), LoBound));
2257  if (LoOverflow)
2258  return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
2259  ICmpInst::ICMP_UGE, X,
2260  ConstantInt::get(Div->getType(), HiBound));
2261  return replaceInstUsesWith(Cmp,
2262  insertRangeTest(X, LoBound, HiBound,
2263  DivIsSigned, false));
2264  case ICmpInst::ICMP_ULT:
2265  case ICmpInst::ICMP_SLT:
2266  if (LoOverflow == +1) // Low bound is greater than input range.
2267  return replaceInstUsesWith(Cmp, Builder.getTrue());
2268  if (LoOverflow == -1) // Low bound is less than input range.
2269  return replaceInstUsesWith(Cmp, Builder.getFalse());
2270  return new ICmpInst(Pred, X, ConstantInt::get(Div->getType(), LoBound));
2271  case ICmpInst::ICMP_UGT:
2272  case ICmpInst::ICMP_SGT:
2273  if (HiOverflow == +1) // High bound greater than input range.
2274  return replaceInstUsesWith(Cmp, Builder.getFalse());
2275  if (HiOverflow == -1) // High bound less than input range.
2276  return replaceInstUsesWith(Cmp, Builder.getTrue());
2277  if (Pred == ICmpInst::ICMP_UGT)
2278  return new ICmpInst(ICmpInst::ICMP_UGE, X,
2279  ConstantInt::get(Div->getType(), HiBound));
2280  return new ICmpInst(ICmpInst::ICMP_SGE, X,
2281  ConstantInt::get(Div->getType(), HiBound));
2282  }
2283 
2284  return nullptr;
2285 }
2286 
2287 /// Fold icmp (sub X, Y), C.
2288 Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp,
2289  BinaryOperator *Sub,
2290  const APInt &C) {
2291  Value *X = Sub->getOperand(0), *Y = Sub->getOperand(1);
2292  ICmpInst::Predicate Pred = Cmp.getPredicate();
2293 
2294  // The following transforms are only worth it if the only user of the subtract
2295  // is the icmp.
2296  if (!Sub->hasOneUse())
2297  return nullptr;
2298 
2299  if (Sub->hasNoSignedWrap()) {
2300  // (icmp sgt (sub nsw X, Y), -1) -> (icmp sge X, Y)
2301  if (Pred == ICmpInst::ICMP_SGT && C.isAllOnesValue())
2302  return new ICmpInst(ICmpInst::ICMP_SGE, X, Y);
2303 
2304  // (icmp sgt (sub nsw X, Y), 0) -> (icmp sgt X, Y)
2305  if (Pred == ICmpInst::ICMP_SGT && C.isNullValue())
2306  return new ICmpInst(ICmpInst::ICMP_SGT, X, Y);
2307 
2308  // (icmp slt (sub nsw X, Y), 0) -> (icmp slt X, Y)
2309  if (Pred == ICmpInst::ICMP_SLT && C.isNullValue())
2310  return new ICmpInst(ICmpInst::ICMP_SLT, X, Y);
2311 
2312  // (icmp slt (sub nsw X, Y), 1) -> (icmp sle X, Y)
2313  if (Pred == ICmpInst::ICMP_SLT && C.isOneValue())
2314  return new ICmpInst(ICmpInst::ICMP_SLE, X, Y);
2315  }
2316 
2317  const APInt *C2;
2318  if (!match(X, m_APInt(C2)))
2319  return nullptr;
2320 
2321  // C2 - Y <u C -> (Y | (C - 1)) == C2
2322  // iff (C2 & (C - 1)) == C - 1 and C is a power of 2
2323  if (Pred == ICmpInst::ICMP_ULT && C.isPowerOf2() &&
2324  (*C2 & (C - 1)) == (C - 1))
2325  return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateOr(Y, C - 1), X);
2326 
2327  // C2 - Y >u C -> (Y | C) != C2
2328  // iff C2 & C == C and C + 1 is a power of 2
2329  if (Pred == ICmpInst::ICMP_UGT && (C + 1).isPowerOf2() && (*C2 & C) == C)
2330  return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateOr(Y, C), X);
2331 
2332  return nullptr;
2333 }
2334 
2335 /// Fold icmp (add X, Y), C.
2336 Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
2338  const APInt &C) {
2339  Value *Y = Add->getOperand(1);
2340  const APInt *C2;
2341  if (Cmp.isEquality() || !match(Y, m_APInt(C2)))
2342  return nullptr;
2343 
2344  // Fold icmp pred (add X, C2), C.
2345  Value *X = Add->getOperand(0);
2346  Type *Ty = Add->getType();
2347  CmpInst::Predicate Pred = Cmp.getPredicate();
2348 
2349  if (!Add->hasOneUse())
2350  return nullptr;
2351 
2352  // If the add does not wrap, we can always adjust the compare by subtracting
2353  // the constants. Equality comparisons are handled elsewhere. SGE/SLE/UGE/ULE
2354  // are canonicalized to SGT/SLT/UGT/ULT.
2355  if ((Add->hasNoSignedWrap() &&
2356  (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT)) ||
2357  (Add->hasNoUnsignedWrap() &&
2358  (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULT))) {
2359  bool Overflow;
2360  APInt NewC =
2361  Cmp.isSigned() ? C.ssub_ov(*C2, Overflow) : C.usub_ov(*C2, Overflow);
2362  // If there is overflow, the result must be true or false.
2363  // TODO: Can we assert there is no overflow because InstSimplify always
2364  // handles those cases?
2365  if (!Overflow)
2366  // icmp Pred (add nsw X, C2), C --> icmp Pred X, (C - C2)
2367  return new ICmpInst(Pred, X, ConstantInt::get(Ty, NewC));
2368  }
2369 
2370  auto CR = ConstantRange::makeExactICmpRegion(Pred, C).subtract(*C2);
2371  const APInt &Upper = CR.getUpper();
2372  const APInt &Lower = CR.getLower();
2373  if (Cmp.isSigned()) {
2374  if (Lower.isSignMask())
2375  return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantInt::get(Ty, Upper));
2376  if (Upper.isSignMask())
2377  return new ICmpInst(ICmpInst::ICMP_SGE, X, ConstantInt::get(Ty, Lower));
2378  } else {
2379  if (Lower.isMinValue())
2380  return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantInt::get(Ty, Upper));
2381  if (Upper.isMinValue())
2382  return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, Lower));
2383  }
2384 
2385  // X+C <u C2 -> (X & -C2) == C
2386  // iff C & (C2-1) == 0
2387  // C2 is a power of 2
2388  if (Pred == ICmpInst::ICMP_ULT && C.isPowerOf2() && (*C2 & (C - 1)) == 0)
2389  return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateAnd(X, -C),
2390  ConstantExpr::getNeg(cast<Constant>(Y)));
2391 
2392  // X+C >u C2 -> (X & ~C2) != C
2393  // iff C & C2 == 0
2394  // C2+1 is a power of 2
2395  if (Pred == ICmpInst::ICMP_UGT && (C + 1).isPowerOf2() && (*C2 & C) == 0)
2396  return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateAnd(X, ~C),
2397  ConstantExpr::getNeg(cast<Constant>(Y)));
2398 
2399  return nullptr;
2400 }
2401 
2402 bool InstCombiner::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS,
2403  Value *&RHS, ConstantInt *&Less,
2404  ConstantInt *&Equal,
2405  ConstantInt *&Greater) {
2406  // TODO: Generalize this to work with other comparison idioms or ensure
2407  // they get canonicalized into this form.
2408 
2409  // select i1 (a == b), i32 Equal, i32 (select i1 (a < b), i32 Less, i32
2410  // Greater), where Equal, Less and Greater are placeholders for any three
2411  // constants.
2412  ICmpInst::Predicate PredA, PredB;
2413  if (match(SI->getTrueValue(), m_ConstantInt(Equal)) &&
2414  match(SI->getCondition(), m_ICmp(PredA, m_Value(LHS), m_Value(RHS))) &&
2415  PredA == ICmpInst::ICMP_EQ &&
2416  match(SI->getFalseValue(),
2417  m_Select(m_ICmp(PredB, m_Specific(LHS), m_Specific(RHS)),
2418  m_ConstantInt(Less), m_ConstantInt(Greater))) &&
2419  PredB == ICmpInst::ICMP_SLT) {
2420  return true;
2421  }
2422  return false;
2423 }
2424 
2425 Instruction *InstCombiner::foldICmpSelectConstant(ICmpInst &Cmp,
2426  SelectInst *Select,
2427  ConstantInt *C) {
2428 
2429  assert(C && "Cmp RHS should be a constant int!");
2430  // If we're testing a constant value against the result of a three way
2431  // comparison, the result can be expressed directly in terms of the
2432  // original values being compared. Note: We could possibly be more
2433  // aggressive here and remove the hasOneUse test. The original select is
2434  // really likely to simplify or sink when we remove a test of the result.
2435  Value *OrigLHS, *OrigRHS;
2436  ConstantInt *C1LessThan, *C2Equal, *C3GreaterThan;
2437  if (Cmp.hasOneUse() &&
2438  matchThreeWayIntCompare(Select, OrigLHS, OrigRHS, C1LessThan, C2Equal,
2439  C3GreaterThan)) {
2440  assert(C1LessThan && C2Equal && C3GreaterThan);
2441 
2442  bool TrueWhenLessThan =
2443  ConstantExpr::getCompare(Cmp.getPredicate(), C1LessThan, C)
2444  ->isAllOnesValue();
2445  bool TrueWhenEqual =
2446  ConstantExpr::getCompare(Cmp.getPredicate(), C2Equal, C)
2447  ->isAllOnesValue();
2448  bool TrueWhenGreaterThan =
2449  ConstantExpr::getCompare(Cmp.getPredicate(), C3GreaterThan, C)
2450  ->isAllOnesValue();
2451 
2452  // This generates the new instruction that will replace the original Cmp
2453  // Instruction. Instead of enumerating the various combinations when
2454  // TrueWhenLessThan, TrueWhenEqual and TrueWhenGreaterThan are true versus
2455  // false, we rely on chaining of ORs and future passes of InstCombine to
2456  // simplify the OR further (i.e. a s< b || a == b becomes a s<= b).
2457 
2458  // When none of the three constants satisfy the predicate for the RHS (C),
2459  // the entire original Cmp can be simplified to a false.
2460  Value *Cond = Builder.getFalse();
2461  if (TrueWhenLessThan)
2462  Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SLT, OrigLHS, OrigRHS));
2463  if (TrueWhenEqual)
2464  Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_EQ, OrigLHS, OrigRHS));
2465  if (TrueWhenGreaterThan)
2466  Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SGT, OrigLHS, OrigRHS));
2467 
2468  return replaceInstUsesWith(Cmp, Cond);
2469  }
2470  return nullptr;
2471 }
2472 
2473 Instruction *InstCombiner::foldICmpBitCastConstant(ICmpInst &Cmp,
2475  const APInt &C) {
2476  // Folding: icmp <pred> iN X, C
2477  // where X = bitcast <M x iK> (shufflevector <M x iK> %vec, undef, SC)) to iN
2478  // and C is a splat of a K-bit pattern
2479  // and SC is a constant vector = <C', C', C', ..., C'>
2480  // Into:
2481  // %E = extractelement <M x iK> %vec, i32 C'
2482  // icmp <pred> iK %E, trunc(C)
2483  if (!Bitcast->getType()->isIntegerTy() ||
2484  !Bitcast->getSrcTy()->isIntOrIntVectorTy())
2485  return nullptr;
2486 
2487  Value *BCIOp = Bitcast->getOperand(0);
2488  Value *Vec = nullptr; // 1st vector arg of the shufflevector
2489  Constant *Mask = nullptr; // Mask arg of the shufflevector
2490  if (match(BCIOp,
2491  m_ShuffleVector(m_Value(Vec), m_Undef(), m_Constant(Mask)))) {
2492  // Check whether every element of Mask is the same constant
2493  if (auto *Elem = dyn_cast_or_null<ConstantInt>(Mask->getSplatValue())) {
2494  auto *VecTy = cast<VectorType>(BCIOp->getType());
2495  auto *EltTy = cast<IntegerType>(VecTy->getElementType());
2496  auto Pred = Cmp.getPredicate();
2497  if (C.isSplat(EltTy->getBitWidth())) {
2498  // Fold the icmp based on the value of C
2499  // If C is M copies of an iK sized bit pattern,
2500  // then:
2501  // => %E = extractelement <N x iK> %vec, i32 Elem
2502  // icmp <pred> iK %SplatVal, <pattern>
2503  Value *Extract = Builder.CreateExtractElement(Vec, Elem);
2504  Value *NewC = ConstantInt::get(EltTy, C.trunc(EltTy->getBitWidth()));
2505  return new ICmpInst(Pred, Extract, NewC);
2506  }
2507  }
2508  }
2509  return nullptr;
2510 }
2511 
2512 /// Try to fold integer comparisons with a constant operand: icmp Pred X, C
2513 /// where X is some kind of instruction.
2514 Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) {
2515  const APInt *C;
2516  if (!match(Cmp.getOperand(1), m_APInt(C)))
2517  return nullptr;
2518 
2519  if (auto *BO = dyn_cast<BinaryOperator>(Cmp.getOperand(0))) {
2520  switch (BO->getOpcode()) {
2521  case Instruction::Xor:
2522  if (Instruction *I = foldICmpXorConstant(Cmp, BO, *C))
2523  return I;
2524  break;
2525  case Instruction::And:
2526  if (Instruction *I = foldICmpAndConstant(Cmp, BO, *C))
2527  return I;
2528  break;
2529  case Instruction::Or:
2530  if (Instruction *I = foldICmpOrConstant(Cmp, BO, *C))
2531  return I;
2532  break;
2533  case Instruction::Mul:
2534  if (Instruction *I = foldICmpMulConstant(Cmp, BO, *C))
2535  return I;
2536  break;
2537  case Instruction::Shl:
2538  if (Instruction *I = foldICmpShlConstant(Cmp, BO, *C))
2539  return I;
2540  break;
2541  case Instruction::LShr:
2542  case Instruction::AShr:
2543  if (Instruction *I = foldICmpShrConstant(Cmp, BO, *C))
2544  return I;
2545  break;
2546  case Instruction::UDiv:
2547  if (Instruction *I = foldICmpUDivConstant(Cmp, BO, *C))
2548  return I;
2550  case Instruction::SDiv:
2551  if (Instruction *I = foldICmpDivConstant(Cmp, BO, *C))
2552  return I;
2553  break;
2554  case Instruction::Sub:
2555  if (Instruction *I = foldICmpSubConstant(Cmp, BO, *C))
2556  return I;
2557  break;
2558  case Instruction::Add:
2559  if (Instruction *I = foldICmpAddConstant(Cmp, BO, *C))
2560  return I;
2561  break;
2562  default:
2563  break;
2564  }
2565  // TODO: These folds could be refactored to be part of the above calls.
2566  if (Instruction *I = foldICmpBinOpEqualityWithConstant(Cmp, BO, *C))
2567  return I;
2568  }
2569 
2570  // Match against CmpInst LHS being instructions other than binary operators.
2571 
2572  if (auto *SI = dyn_cast<SelectInst>(Cmp.getOperand(0))) {
2573  // For now, we only support constant integers while folding the
2574  // ICMP(SELECT)) pattern. We can extend this to support vector of integers
2575  // similar to the cases handled by binary ops above.
2576  if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(Cmp.getOperand(1)))
2577  if (Instruction *I = foldICmpSelectConstant(Cmp, SI, ConstRHS))
2578  return I;
2579  }
2580 
2581  if (auto *TI = dyn_cast<TruncInst>(Cmp.getOperand(0))) {
2582  if (Instruction *I = foldICmpTruncConstant(Cmp, TI, *C))
2583  return I;
2584  }
2585 
2586  if (auto *BCI = dyn_cast<BitCastInst>(Cmp.getOperand(0))) {
2587  if (Instruction *I = foldICmpBitCastConstant(Cmp, BCI, *C))
2588  return I;
2589  }
2590 
2591  if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, *C))
2592  return I;
2593 
2594  return nullptr;
2595 }
2596 
2597 /// Fold an icmp equality instruction with binary operator LHS and constant RHS:
2598 /// icmp eq/ne BO, C.
2599 Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
2600  BinaryOperator *BO,
2601  const APInt &C) {
2602  // TODO: Some of these folds could work with arbitrary constants, but this
2603  // function is limited to scalar and vector splat constants.
2604  if (!Cmp.isEquality())
2605  return nullptr;
2606 
2607  ICmpInst::Predicate Pred = Cmp.getPredicate();
2608  bool isICMP_NE = Pred == ICmpInst::ICMP_NE;
2609  Constant *RHS = cast<Constant>(Cmp.getOperand(1));
2610  Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
2611 
2612  switch (BO->getOpcode()) {
2613  case Instruction::SRem:
2614  // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
2615  if (C.isNullValue() && BO->hasOneUse()) {
2616  const APInt *BOC;
2617  if (match(BOp1, m_APInt(BOC)) && BOC->sgt(1) && BOC->isPowerOf2()) {
2618  Value *NewRem = Builder.CreateURem(BOp0, BOp1, BO->getName());
2619  return new ICmpInst(Pred, NewRem,
2621  }
2622  }
2623  break;
2624  case Instruction::Add: {
2625  // Replace ((add A, B) != C) with (A != C-B) if B & C are constants.
2626  const APInt *BOC;
2627  if (match(BOp1, m_APInt(BOC))) {
2628  if (BO->hasOneUse()) {
2629  Constant *SubC = ConstantExpr::getSub(RHS, cast<Constant>(BOp1));
2630  return new ICmpInst(Pred, BOp0, SubC);
2631  }
2632  } else if (C.isNullValue()) {
2633  // Replace ((add A, B) != 0) with (A != -B) if A or B is
2634  // efficiently invertible, or if the add has just this one use.
2635  if (Value *NegVal = dyn_castNegVal(BOp1))
2636  return new ICmpInst(Pred, BOp0, NegVal);
2637  if (Value *NegVal = dyn_castNegVal(BOp0))
2638  return new ICmpInst(Pred, NegVal, BOp1);
2639  if (BO->hasOneUse()) {
2640  Value *Neg = Builder.CreateNeg(BOp1);
2641  Neg->takeName(BO);
2642  return new ICmpInst(Pred, BOp0, Neg);
2643  }
2644  }
2645  break;
2646  }
2647  case Instruction::Xor:
2648  if (BO->hasOneUse()) {
2649  if (Constant *BOC = dyn_cast<Constant>(BOp1)) {
2650  // For the xor case, we can xor two constants together, eliminating
2651  // the explicit xor.
2652  return new ICmpInst(Pred, BOp0, ConstantExpr::getXor(RHS, BOC));
2653  } else if (C.isNullValue()) {
2654  // Replace ((xor A, B) != 0) with (A != B)
2655  return new ICmpInst(Pred, BOp0, BOp1);
2656  }
2657  }
2658  break;
2659  case Instruction::Sub:
2660  if (BO->hasOneUse()) {
2661  const APInt *BOC;
2662  if (match(BOp0, m_APInt(BOC))) {
2663  // Replace ((sub BOC, B) != C) with (B != BOC-C).
2664  Constant *SubC = ConstantExpr::getSub(cast<Constant>(BOp0), RHS);
2665  return new ICmpInst(Pred, BOp1, SubC);
2666  } else if (C.isNullValue()) {
2667  // Replace ((sub A, B) != 0) with (A != B).
2668  return new ICmpInst(Pred, BOp0, BOp1);
2669  }
2670  }
2671  break;
2672  case Instruction::Or: {
2673  const APInt *BOC;
2674  if (match(BOp1, m_APInt(BOC)) && BO->hasOneUse() && RHS->isAllOnesValue()) {
2675  // Comparing if all bits outside of a constant mask are set?
2676  // Replace (X | C) == -1 with (X & ~C) == ~C.
2677  // This removes the -1 constant.
2678  Constant *NotBOC = ConstantExpr::getNot(cast<Constant>(BOp1));
2679  Value *And = Builder.CreateAnd(BOp0, NotBOC);
2680  return new ICmpInst(Pred, And, NotBOC);
2681  }
2682  break;
2683  }
2684  case Instruction::And: {
2685  const APInt *BOC;
2686  if (match(BOp1, m_APInt(BOC))) {
2687  // If we have ((X & C) == C), turn it into ((X & C) != 0).
2688  if (C == *BOC && C.isPowerOf2())
2689  return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
2690  BO, Constant::getNullValue(RHS->getType()));
2691 
2692  // Don't perform the following transforms if the AND has multiple uses
2693  if (!BO->hasOneUse())
2694  break;
2695 
2696  // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
2697  if (BOC->isSignMask()) {
2698  Constant *Zero = Constant::getNullValue(BOp0->getType());
2699  auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
2700  return new ICmpInst(NewPred, BOp0, Zero);
2701  }
2702 
2703  // ((X & ~7) == 0) --> X < 8
2704  if (C.isNullValue() && (~(*BOC) + 1).isPowerOf2()) {
2705  Constant *NegBOC = ConstantExpr::getNeg(cast<Constant>(BOp1));
2706  auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
2707  return new ICmpInst(NewPred, BOp0, NegBOC);
2708  }
2709  }
2710  break;
2711  }
2712  case Instruction::Mul:
2713  if (C.isNullValue() && BO->hasNoSignedWrap()) {
2714  const APInt *BOC;
2715  if (match(BOp1, m_APInt(BOC)) && !BOC->isNullValue()) {
2716  // The trivial case (mul X, 0) is handled by InstSimplify.
2717  // General case : (mul X, C) != 0 iff X != 0
2718  // (mul X, C) == 0 iff X == 0
2719  return new ICmpInst(Pred, BOp0, Constant::getNullValue(RHS->getType()));
2720  }
2721  }
2722  break;
2723  case Instruction::UDiv:
2724  if (C.isNullValue()) {
2725  // (icmp eq/ne (udiv A, B), 0) -> (icmp ugt/ule i32 B, A)
2726  auto NewPred = isICMP_NE ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT;
2727  return new ICmpInst(NewPred, BOp1, BOp0);
2728  }
2729  break;
2730  default:
2731  break;
2732  }
2733  return nullptr;
2734 }
2735 
2736 /// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
2737 Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
2738  const APInt &C) {
2740  if (!II || !Cmp.isEquality())
2741  return nullptr;
2742 
2743  // Handle icmp {eq|ne} <intrinsic>, Constant.
2744  Type *Ty = II->getType();
2745  switch (II->getIntrinsicID()) {
2746  case Intrinsic::bswap:
2747  Worklist.Add(II);
2748  Cmp.setOperand(0, II->getArgOperand(0));
2749  Cmp.setOperand(1, ConstantInt::get(Ty, C.byteSwap()));
2750  return &Cmp;
2751 
2752  case Intrinsic::ctlz:
2753  case Intrinsic::cttz:
2754  // ctz(A) == bitwidth(A) -> A == 0 and likewise for !=
2755  if (C == C.getBitWidth()) {
2756  Worklist.Add(II);
2757  Cmp.setOperand(0, II->getArgOperand(0));
2759  return &Cmp;
2760  }
2761  break;
2762 
2763  case Intrinsic::ctpop: {
2764  // popcount(A) == 0 -> A == 0 and likewise for !=
2765  // popcount(A) == bitwidth(A) -> A == -1 and likewise for !=
2766  bool IsZero = C.isNullValue();
2767  if (IsZero || C == C.getBitWidth()) {
2768  Worklist.Add(II);
2769  Cmp.setOperand(0, II->getArgOperand(0));
2770  auto *NewOp =
2772  Cmp.setOperand(1, NewOp);
2773  return &Cmp;
2774  }
2775  break;
2776  }
2777  default:
2778  break;
2779  }
2780 
2781  return nullptr;
2782 }
2783 
2784 /// Handle icmp with constant (but not simple integer constant) RHS.
2785 Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) {
2786  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
2787  Constant *RHSC = dyn_cast<Constant>(Op1);
2788  Instruction *LHSI = dyn_cast<Instruction>(Op0);
2789  if (!RHSC || !LHSI)
2790  return nullptr;
2791 
2792  switch (LHSI->getOpcode()) {
2793  case Instruction::GetElementPtr:
2794  // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null
2795  if (RHSC->isNullValue() &&
2796  cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices())
2797  return new ICmpInst(
2798  I.getPredicate(), LHSI->getOperand(0),
2799  Constant::getNullValue(LHSI->getOperand(0)->getType()));
2800  break;
2801  case Instruction::PHI:
2802  // Only fold icmp into the PHI if the phi and icmp are in the same
2803  // block. If in the same block, we're encouraging jump threading. If
2804  // not, we are just pessimizing the code by making an i1 phi.
2805  if (LHSI->getParent() == I.getParent())
2806  if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
2807  return NV;
2808  break;
2809  case Instruction::Select: {
2810  // If either operand of the select is a constant, we can fold the
2811  // comparison into the select arms, which will cause one to be
2812  // constant folded and the select turned into a bitwise or.
2813  Value *Op1 = nullptr, *Op2 = nullptr;
2814  ConstantInt *CI = nullptr;
2815  if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
2816  Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
2817  CI = dyn_cast<ConstantInt>(Op1);
2818  }
2819  if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
2820  Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
2821  CI = dyn_cast<ConstantInt>(Op2);
2822  }
2823 
2824  // We only want to perform this transformation if it will not lead to
2825  // additional code. This is true if either both sides of the select
2826  // fold to a constant (in which case the icmp is replaced with a select
2827  // which will usually simplify) or this is the only user of the
2828  // select (in which case we are trading a select+icmp for a simpler
2829  // select+icmp) or all uses of the select can be replaced based on
2830  // dominance information ("Global cases").
2831  bool Transform = false;
2832  if (Op1 && Op2)
2833  Transform = true;
2834  else if (Op1 || Op2) {
2835  // Local case
2836  if (LHSI->hasOneUse())
2837  Transform = true;
2838  // Global cases
2839  else if (CI && !CI->isZero())
2840  // When Op1 is constant try replacing select with second operand.
2841  // Otherwise Op2 is constant and try replacing select with first
2842  // operand.
2843  Transform =
2844  replacedSelectWithOperand(cast<SelectInst>(LHSI), &I, Op1 ? 2 : 1);
2845  }
2846  if (Transform) {
2847  if (!Op1)
2848  Op1 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(1), RHSC,
2849  I.getName());
2850  if (!Op2)
2851  Op2 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(2), RHSC,
2852  I.getName());
2853  return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
2854  }
2855  break;
2856  }
2857  case Instruction::IntToPtr:
2858  // icmp pred inttoptr(X), null -> icmp pred X, 0
2859  if (RHSC->isNullValue() &&
2860  DL.getIntPtrType(RHSC->getType()) == LHSI->getOperand(0)->getType())
2861  return new ICmpInst(
2862  I.getPredicate(), LHSI->getOperand(0),
2863  Constant::getNullValue(LHSI->getOperand(0)->getType()));
2864  break;
2865 
2866  case Instruction::Load:
2867  // Try to optimize things like "A[i] > 4" to index computations.
2868  if (GetElementPtrInst *GEP =
2869  dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
2870  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
2871  if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
2872  !cast<LoadInst>(LHSI)->isVolatile())
2873  if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I))
2874  return Res;
2875  }
2876  break;
2877  }
2878 
2879  return nullptr;
2880 }
2881 
2882 /// Some comparisons can be simplified.
2883 /// In this case, we are looking for comparisons that look like
2884 /// a check for a lossy truncation.
2885 /// Folds:
2886 /// icmp SrcPred (x & Mask), x to icmp DstPred x, Mask
2887 /// Where Mask is some pattern that produces all-ones in low bits:
2888 /// (-1 >> y)
2889 /// ((-1 << y) >> y) <- non-canonical, has extra uses
2890 /// ~(-1 << y)
2891 /// ((1 << y) + (-1)) <- non-canonical, has extra uses
2892 /// The Mask can be a constant, too.
2893 /// For some predicates, the operands are commutative.
2894 /// For others, x can only be on a specific side.
2896  InstCombiner::BuilderTy &Builder) {
2897  ICmpInst::Predicate SrcPred;
2898  Value *X, *M, *Y;
2899  auto m_VariableMask = m_CombineOr(
2901  m_Add(m_Shl(m_One(), m_Value()), m_AllOnes())),
2903  m_LShr(m_Shl(m_AllOnes(), m_Value(Y)), m_Deferred(Y))));
2904  auto m_Mask = m_CombineOr(m_VariableMask, m_LowBitMask());
2905  if (!match(&I, m_c_ICmp(SrcPred,
2906  m_c_And(m_CombineAnd(m_Mask, m_Value(M)), m_Value(X)),
2907  m_Deferred(X))))
2908  return nullptr;
2909 
2910  ICmpInst::Predicate DstPred;
2911  switch (SrcPred) {
2912  case ICmpInst::Predicate::ICMP_EQ:
2913  // x & (-1 >> y) == x -> x u<= (-1 >> y)
2914  DstPred = ICmpInst::Predicate::ICMP_ULE;
2915  break;
2916  case ICmpInst::Predicate::ICMP_NE:
2917  // x & (-1 >> y) != x -> x u> (-1 >> y)
2918  DstPred = ICmpInst::Predicate::ICMP_UGT;
2919  break;
2920  case ICmpInst::Predicate::ICMP_UGT:
2921  // x u> x & (-1 >> y) -> x u> (-1 >> y)
2922  assert(X == I.getOperand(0) && "instsimplify took care of commut. variant");
2923  DstPred = ICmpInst::Predicate::ICMP_UGT;
2924  break;
2925  case ICmpInst::Predicate::ICMP_UGE:
2926  // x & (-1 >> y) u>= x -> x u<= (-1 >> y)
2927  assert(X == I.getOperand(1) && "instsimplify took care of commut. variant");
2928  DstPred = ICmpInst::Predicate::ICMP_ULE;
2929  break;
2930  case ICmpInst::Predicate::ICMP_ULT:
2931  // x & (-1 >> y) u< x -> x u> (-1 >> y)
2932  assert(X == I.getOperand(1) && "instsimplify took care of commut. variant");
2933  DstPred = ICmpInst::Predicate::ICMP_UGT;
2934  break;
2935  case ICmpInst::Predicate::ICMP_ULE:
2936  // x u<= x & (-1 >> y) -> x u<= (-1 >> y)
2937  assert(X == I.getOperand(0) && "instsimplify took care of commut. variant");
2938  DstPred = ICmpInst::Predicate::ICMP_ULE;
2939  break;
2940  case ICmpInst::Predicate::ICMP_SGT:
2941  // x s> x & (-1 >> y) -> x s> (-1 >> y)
2942  if (X != I.getOperand(0)) // X must be on LHS of comparison!
2943  return nullptr; // Ignore the other case.
2944  DstPred = ICmpInst::Predicate::ICMP_SGT;
2945  break;
2946  case ICmpInst::Predicate::ICMP_SGE:
2947  // x & (-1 >> y) s>= x -> x s<= (-1 >> y)
2948  if (X != I.getOperand(1)) // X must be on RHS of comparison!
2949  return nullptr; // Ignore the other case.
2950  DstPred = ICmpInst::Predicate::ICMP_SLE;
2951  break;
2952  case ICmpInst::Predicate::ICMP_SLT:
2953  // x & (-1 >> y) s< x -> x s> (-1 >> y)
2954  if (X != I.getOperand(1)) // X must be on RHS of comparison!
2955  return nullptr; // Ignore the other case.
2956  DstPred = ICmpInst::Predicate::ICMP_SGT;
2957  break;
2958  case ICmpInst::Predicate::ICMP_SLE:
2959  // x s<= x & (-1 >> y) -> x s<= (-1 >> y)
2960  if (X != I.getOperand(0)) // X must be on LHS of comparison!
2961  return nullptr; // Ignore the other case.
2962  DstPred = ICmpInst::Predicate::ICMP_SLE;
2963  break;
2964  default:
2965  llvm_unreachable("All possible folds are handled.");
2966  }
2967 
2968  return Builder.CreateICmp(DstPred, X, M);
2969 }
2970 
2971 /// Some comparisons can be simplified.
2972 /// In this case, we are looking for comparisons that look like
2973 /// a check for a lossy signed truncation.
2974 /// Folds: (MaskedBits is a constant.)
2975 /// ((%x << MaskedBits) a>> MaskedBits) SrcPred %x
2976 /// Into:
2977 /// (add %x, (1 << (KeptBits-1))) DstPred (1 << KeptBits)
2978 /// Where KeptBits = bitwidth(%x) - MaskedBits
2979 static Value *
2981  InstCombiner::BuilderTy &Builder) {
2982  ICmpInst::Predicate SrcPred;
2983  Value *X;
2984  const APInt *C0, *C1; // FIXME: non-splats, potentially with undef.
2985  // We are ok with 'shl' having multiple uses, but 'ashr' must be one-use.
2986  if (!match(&I, m_c_ICmp(SrcPred,
2987  m_OneUse(m_AShr(m_Shl(m_Value(X), m_APInt(C0)),
2988  m_APInt(C1))),
2989  m_Deferred(X))))
2990  return nullptr;
2991 
2992  // Potential handling of non-splats: for each element:
2993  // * if both are undef, replace with constant 0.
2994  // Because (1<<0) is OK and is 1, and ((1<<0)>>1) is also OK and is 0.
2995  // * if both are not undef, and are different, bailout.
2996  // * else, only one is undef, then pick the non-undef one.
2997 
2998  // The shift amount must be equal.
2999  if (*C0 != *C1)
3000  return nullptr;
3001  const APInt &MaskedBits = *C0;
3002  assert(MaskedBits != 0 && "shift by zero should be folded away already.");
3003 
3004  ICmpInst::Predicate DstPred;
3005  switch (SrcPred) {
3006  case ICmpInst::Predicate::ICMP_EQ:
3007  // ((%x << MaskedBits) a>> MaskedBits) == %x
3008  // =>
3009  // (add %x, (1 << (KeptBits-1))) u< (1 << KeptBits)
3010  DstPred = ICmpInst::Predicate::ICMP_ULT;
3011  break;
3012  case ICmpInst::Predicate::ICMP_NE:
3013  // ((%x << MaskedBits) a>> MaskedBits) != %x
3014  // =>
3015  // (add %x, (1 << (KeptBits-1))) u>= (1 << KeptBits)
3016  DstPred = ICmpInst::Predicate::ICMP_UGE;
3017  break;
3018  // FIXME: are more folds possible?
3019  default:
3020  return nullptr;
3021  }
3022 
3023  auto *XType = X->getType();
3024  const unsigned XBitWidth = XType->getScalarSizeInBits();
3025  const APInt BitWidth = APInt(XBitWidth, XBitWidth);
3026  assert(BitWidth.ugt(MaskedBits) && "shifts should leave some bits untouched");
3027 
3028  // KeptBits = bitwidth(%x) - MaskedBits
3029  const APInt KeptBits = BitWidth - MaskedBits;
3030  assert(KeptBits.ugt(0) && KeptBits.ult(BitWidth) && "unreachable");
3031  // ICmpCst = (1 << KeptBits)
3032  const APInt ICmpCst = APInt(XBitWidth, 1).shl(KeptBits);
3033  assert(ICmpCst.isPowerOf2());
3034  // AddCst = (1 << (KeptBits-1))
3035  const APInt AddCst = ICmpCst.lshr(1);
3036  assert(AddCst.ult(ICmpCst) && AddCst.isPowerOf2());
3037 
3038  // T0 = add %x, AddCst
3039  Value *T0 = Builder.CreateAdd(X, ConstantInt::get(XType, AddCst));
3040  // T1 = T0 DstPred ICmpCst
3041  Value *T1 = Builder.CreateICmp(DstPred, T0, ConstantInt::get(XType, ICmpCst));
3042 
3043  return T1;
3044 }
3045 
3046 /// Try to fold icmp (binop), X or icmp X, (binop).
3047 /// TODO: A large part of this logic is duplicated in InstSimplify's
3048 /// simplifyICmpWithBinOp(). We should be able to share that and avoid the code
3049 /// duplication.
3050 Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
3051  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
3052 
3053  // Special logic for binary operators.
3054  BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0);
3055  BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1);
3056  if (!BO0 && !BO1)
3057  return nullptr;
3058 
3059  const CmpInst::Predicate Pred = I.getPredicate();
3060  Value *X;
3061 
3062  // Convert add-with-unsigned-overflow comparisons into a 'not' with compare.
3063  // (Op1 + X) <u Op1 --> ~Op1 <u X
3064  // Op0 >u (Op0 + X) --> X >u ~Op0
3065  if (match(Op0, m_OneUse(m_c_Add(m_Specific(Op1), m_Value(X)))) &&
3066  Pred == ICmpInst::ICMP_ULT)
3067  return new ICmpInst(Pred, Builder.CreateNot(Op1), X);
3068  if (match(Op1, m_OneUse(m_c_Add(m_Specific(Op0), m_Value(X)))) &&
3069  Pred == ICmpInst::ICMP_UGT)
3070  return new ICmpInst(Pred, X, Builder.CreateNot(Op0));
3071 
3072  bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
3073  if (BO0 && isa<OverflowingBinaryOperator>(BO0))
3074  NoOp0WrapProblem =
3075  ICmpInst::isEquality(Pred) ||
3076  (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) ||
3077  (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap());
3078  if (BO1 && isa<OverflowingBinaryOperator>(BO1))
3079  NoOp1WrapProblem =
3080  ICmpInst::isEquality(Pred) ||
3081  (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) ||
3082  (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap());
3083 
3084  // Analyze the case when either Op0 or Op1 is an add instruction.
3085  // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
3086  Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
3087  if (BO0 && BO0->getOpcode() == Instruction::Add) {
3088  A = BO0->getOperand(0);
3089  B = BO0->getOperand(1);
3090  }
3091  if (BO1 && BO1->getOpcode() == Instruction::Add) {
3092  C = BO1->getOperand(0);
3093  D = BO1->getOperand(1);
3094  }
3095 
3096  // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
3097  if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
3098  return new ICmpInst(Pred, A == Op1 ? B : A,
3099  Constant::getNullValue(Op1->getType()));
3100 
3101  // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
3102  if ((C == Op0 || D == Op0) && NoOp1WrapProblem)
3103  return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()),
3104  C == Op0 ? D : C);
3105 
3106  // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow.
3107  if (A && C && (A == C || A == D || B == C || B == D) && NoOp0WrapProblem &&
3108  NoOp1WrapProblem &&
3109  // Try not to increase register pressure.
3110  BO0->hasOneUse() && BO1->hasOneUse()) {
3111  // Determine Y and Z in the form icmp (X+Y), (X+Z).
3112  Value *Y, *Z;
3113  if (A == C) {
3114  // C + B == C + D -> B == D
3115  Y = B;
3116  Z = D;
3117  } else if (A == D) {
3118  // D + B == C + D -> B == C
3119  Y = B;
3120  Z = C;
3121  } else if (B == C) {
3122  // A + C == C + D -> A == D
3123  Y = A;
3124  Z = D;
3125  } else {
3126  assert(B == D);
3127  // A + D == C + D -> A == C
3128  Y = A;
3129  Z = C;
3130  }
3131  return new ICmpInst(Pred, Y, Z);
3132  }
3133 
3134  // icmp slt (X + -1), Y -> icmp sle X, Y
3135  if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT &&
3136  match(B, m_AllOnes()))
3137  return new ICmpInst(CmpInst::ICMP_SLE, A, Op1);
3138 
3139  // icmp sge (X + -1), Y -> icmp sgt X, Y
3140  if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE &&
3141  match(B, m_AllOnes()))
3142  return new ICmpInst(CmpInst::ICMP_SGT, A, Op1);
3143 
3144  // icmp sle (X + 1), Y -> icmp slt X, Y
3145  if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE && match(B, m_One()))
3146  return new ICmpInst(CmpInst::ICMP_SLT, A, Op1);
3147 
3148  // icmp sgt (X + 1), Y -> icmp sge X, Y
3149  if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT && match(B, m_One()))
3150  return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
3151 
3152  // icmp sgt X, (Y + -1) -> icmp sge X, Y
3153  if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGT &&
3154  match(D, m_AllOnes()))
3155  return new ICmpInst(CmpInst::ICMP_SGE, Op0, C);
3156 
3157  // icmp sle X, (Y + -1) -> icmp slt X, Y
3158  if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLE &&
3159  match(D, m_AllOnes()))
3160  return new ICmpInst(CmpInst::ICMP_SLT, Op0, C);
3161 
3162  // icmp sge X, (Y + 1) -> icmp sgt X, Y
3163  if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGE && match(D, m_One()))
3164  return new ICmpInst(CmpInst::ICMP_SGT, Op0, C);
3165 
3166  // icmp slt X, (Y + 1) -> icmp sle X, Y
3167  if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT && match(D, m_One()))
3168  return new ICmpInst(CmpInst::ICMP_SLE, Op0, C);
3169 
3170  // TODO: The subtraction-related identities shown below also hold, but
3171  // canonicalization from (X -nuw 1) to (X + -1) means that the combinations
3172  // wouldn't happen even if they were implemented.
3173  //
3174  // icmp ult (X - 1), Y -> icmp ule X, Y
3175  // icmp uge (X - 1), Y -> icmp ugt X, Y
3176  // icmp ugt X, (Y - 1) -> icmp uge X, Y
3177  // icmp ule X, (Y - 1) -> icmp ult X, Y
3178 
3179  // icmp ule (X + 1), Y -> icmp ult X, Y
3180  if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_ULE && match(B, m_One()))
3181  return new ICmpInst(CmpInst::ICMP_ULT, A, Op1);
3182 
3183  // icmp ugt (X + 1), Y -> icmp uge X, Y
3184  if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_UGT && match(B, m_One()))
3185  return new ICmpInst(CmpInst::ICMP_UGE, A, Op1);
3186 
3187  // icmp uge X, (Y + 1) -> icmp ugt X, Y
3188  if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_UGE && match(D, m_One()))
3189  return new ICmpInst(CmpInst::ICMP_UGT, Op0, C);
3190 
3191  // icmp ult X, (Y + 1) -> icmp ule X, Y
3192  if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_ULT && match(D, m_One()))
3193  return new ICmpInst(CmpInst::ICMP_ULE, Op0, C);
3194 
3195  // if C1 has greater magnitude than C2:
3196  // icmp (X + C1), (Y + C2) -> icmp (X + C3), Y
3197  // s.t. C3 = C1 - C2
3198  //
3199  // if C2 has greater magnitude than C1:
3200  // icmp (X + C1), (Y + C2) -> icmp X, (Y + C3)
3201  // s.t. C3 = C2 - C1
3202  if (A && C && NoOp0WrapProblem && NoOp1WrapProblem &&
3203  (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned())
3204  if (ConstantInt *C1 = dyn_cast<ConstantInt>(B))
3205  if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) {
3206  const APInt &AP1 = C1->getValue();
3207  const APInt &AP2 = C2->getValue();
3208  if (AP1.isNegative() == AP2.isNegative()) {
3209  APInt AP1Abs = C1->getValue().abs();
3210  APInt AP2Abs = C2->getValue().abs();
3211  if (AP1Abs.uge(AP2Abs)) {
3212  ConstantInt *C3 = Builder.getInt(AP1 - AP2);
3213  Value *NewAdd = Builder.CreateNSWAdd(A, C3);
3214  return new ICmpInst(Pred, NewAdd, C);
3215  } else {
3216  ConstantInt *C3 = Builder.getInt(AP2 - AP1);
3217  Value *NewAdd = Builder.CreateNSWAdd(C, C3);
3218  return new ICmpInst(Pred, A, NewAdd);
3219  }
3220  }
3221  }
3222 
3223  // Analyze the case when either Op0 or Op1 is a sub instruction.
3224  // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
3225  A = nullptr;
3226  B = nullptr;
3227  C = nullptr;
3228  D = nullptr;
3229  if (BO0 && BO0->getOpcode() == Instruction::Sub) {
3230  A = BO0->getOperand(0);
3231  B = BO0->getOperand(1);
3232  }
3233  if (BO1 && BO1->getOpcode() == Instruction::Sub) {
3234  C = BO1->getOperand(0);
3235  D = BO1->getOperand(1);
3236  }
3237 
3238  // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow.
3239  if (A == Op1 && NoOp0WrapProblem)
3240  return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B);
3241  // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow.
3242  if (C == Op0 && NoOp1WrapProblem)
3243  return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType()));
3244 
3245  // (A - B) >u A --> A <u B
3246  if (A == Op1 && Pred == ICmpInst::ICMP_UGT)
3247  return new ICmpInst(ICmpInst::ICMP_ULT, A, B);
3248  // C <u (C - D) --> C <u D
3249  if (C == Op0 && Pred == ICmpInst::ICMP_ULT)
3250  return new ICmpInst(ICmpInst::ICMP_ULT, C, D);
3251 
3252  // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow.
3253  if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem &&
3254  // Try not to increase register pressure.
3255  BO0->hasOneUse() && BO1->hasOneUse())
3256  return new ICmpInst(Pred, A, C);
3257  // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow.
3258  if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem &&
3259  // Try not to increase register pressure.
3260  BO0->hasOneUse() && BO1->hasOneUse())
3261  return new ICmpInst(Pred, D, B);
3262 
3263  // icmp (0-X) < cst --> x > -cst
3264  if (NoOp0WrapProblem && ICmpInst::isSigned(Pred)) {
3265  Value *X;
3266  if (match(BO0, m_Neg(m_Value(X))))
3267  if (Constant *RHSC = dyn_cast<Constant>(Op1))
3268  if (RHSC->isNotMinSignedValue())
3269  return new ICmpInst(I.getSwappedPredicate(), X,
3270  ConstantExpr::getNeg(RHSC));
3271  }
3272 
3273  BinaryOperator *SRem = nullptr;
3274  // icmp (srem X, Y), Y
3275  if (BO0 && BO0->getOpcode() == Instruction::SRem && Op1 == BO0->getOperand(1))
3276  SRem = BO0;
3277  // icmp Y, (srem X, Y)
3278  else if (BO1 && BO1->getOpcode() == Instruction::SRem &&
3279  Op0 == BO1->getOperand(1))
3280  SRem = BO1;
3281  if (SRem) {
3282  // We don't check hasOneUse to avoid increasing register pressure because
3283  // the value we use is the same value this instruction was already using.
3284  switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) {
3285  default:
3286  break;
3287  case ICmpInst::ICMP_EQ:
3288  return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
3289  case ICmpInst::ICMP_NE:
3290  return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
3291  case ICmpInst::ICMP_SGT:
3292  case ICmpInst::ICMP_SGE:
3293  return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1),
3295  case ICmpInst::ICMP_SLT:
3296  case ICmpInst::ICMP_SLE:
3297  return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1),
3298  Constant::getNullValue(SRem->getType()));
3299  }
3300  }
3301 
3302  if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() && BO0->hasOneUse() &&
3303  BO1->hasOneUse() && BO0->getOperand(1) == BO1->getOperand(1)) {
3304  switch (BO0->getOpcode()) {
3305  default:
3306  break;
3307  case Instruction::Add:
3308  case Instruction::Sub:
3309  case Instruction::Xor: {
3310  if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b
3311  return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
3312 
3313  const APInt *C;
3314  if (match(BO0->getOperand(1), m_APInt(C))) {
3315  // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
3316  if (C->isSignMask()) {
3317  ICmpInst::Predicate NewPred =
3319  return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0));
3320  }
3321 
3322  // icmp u/s (a ^ maxsignval), (b ^ maxsignval) --> icmp s/u' a, b
3323  if (BO0->getOpcode() == Instruction::Xor && C->isMaxSignedValue()) {
3324  ICmpInst::Predicate NewPred =
3326  NewPred = I.getSwappedPredicate(NewPred);
3327  return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0));
3328  }
3329  }
3330  break;
3331  }
3332  case Instruction::Mul: {
3333  if (!I.isEquality())
3334  break;
3335 
3336  const APInt *C;
3337  if (match(BO0->getOperand(1), m_APInt(C)) && !C->isNullValue() &&
3338  !C->isOneValue()) {
3339  // icmp eq/ne (X * C), (Y * C) --> icmp (X & Mask), (Y & Mask)
3340  // Mask = -1 >> count-trailing-zeros(C).
3341  if (unsigned TZs = C->countTrailingZeros()) {
3343  BO0->getType(),
3344  APInt::getLowBitsSet(C->getBitWidth(), C->getBitWidth() - TZs));
3345  Value *And1 = Builder.CreateAnd(BO0->getOperand(0), Mask);
3346  Value *And2 = Builder.CreateAnd(BO1->getOperand(0), Mask);
3347  return new ICmpInst(Pred, And1, And2);
3348  }
3349  // If there are no trailing zeros in the multiplier, just eliminate
3350  // the multiplies (no masking is needed):
3351  // icmp eq/ne (X * C), (Y * C) --> icmp eq/ne X, Y
3352  return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
3353  }
3354  break;
3355  }
3356  case Instruction::UDiv:
3357  case Instruction::LShr:
3358  if (I.isSigned() || !BO0->isExact() || !BO1->isExact())
3359  break;
3360  return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
3361 
3362  case Instruction::SDiv:
3363  if (!I.isEquality() || !BO0->isExact() || !BO1->isExact())
3364  break;
3365  return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
3366 
3367  case Instruction::AShr:
3368  if (!BO0->isExact() || !BO1->isExact())
3369  break;
3370  return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
3371 
3372  case Instruction::Shl: {
3373  bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap();
3374  bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap();
3375  if (!NUW && !NSW)
3376  break;
3377  if (!NSW && I.isSigned())
3378  break;
3379  return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
3380  }
3381  }
3382  }
3383 
3384  if (BO0) {
3385  // Transform A & (L - 1) `ult` L --> L != 0
3386  auto LSubOne = m_Add(m_Specific(Op1), m_AllOnes());
3387  auto BitwiseAnd = m_c_And(m_Value(), LSubOne);
3388 
3389  if (match(BO0, BitwiseAnd) && Pred == ICmpInst::ICMP_ULT) {
3390  auto *Zero = Constant::getNullValue(BO0->getType());
3391  return new ICmpInst(ICmpInst::ICMP_NE, Op1, Zero);
3392  }
3393  }
3394 
3395  if (Value *V = foldICmpWithLowBitMaskedVal(I, Builder))
3396  return replaceInstUsesWith(I, V);
3397 
3398  if (Value *V = foldICmpWithTruncSignExtendedVal(I, Builder))
3399  return replaceInstUsesWith(I, V);
3400 
3401  return nullptr;
3402 }
3403 
3404 /// Fold icmp Pred min|max(X, Y), X.
3406  ICmpInst::Predicate Pred = Cmp.getPredicate();
3407  Value *Op0 = Cmp.getOperand(0);
3408  Value *X = Cmp.getOperand(1);
3409 
3410  // Canonicalize minimum or maximum operand to LHS of the icmp.
3411  if (match(X, m_c_SMin(m_Specific(Op0), m_Value())) ||
3412  match(X, m_c_SMax(m_Specific(Op0), m_Value())) ||
3413  match(X, m_c_UMin(m_Specific(Op0), m_Value())) ||
3414  match(X, m_c_UMax(m_Specific(Op0), m_Value()))) {
3415  std::swap(Op0, X);
3416  Pred = Cmp.getSwappedPredicate();
3417  }
3418 
3419  Value *Y;
3420  if (match(Op0, m_c_SMin(m_Specific(X), m_Value(Y)))) {
3421  // smin(X, Y) == X --> X s<= Y
3422  // smin(X, Y) s>= X --> X s<= Y
3423  if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SGE)
3424  return new ICmpInst(ICmpInst::ICMP_SLE, X, Y);
3425 
3426  // smin(X, Y) != X --> X s> Y
3427  // smin(X, Y) s< X --> X s> Y
3428  if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_SLT)
3429  return new ICmpInst(ICmpInst::ICMP_SGT, X, Y);
3430 
3431  // These cases should be handled in InstSimplify:
3432  // smin(X, Y) s<= X --> true
3433  // smin(X, Y) s> X --> false
3434  return nullptr;
3435  }
3436 
3437  if (match(Op0, m_c_SMax(m_Specific(X), m_Value(Y)))) {
3438  // smax(X, Y) == X --> X s>= Y
3439  // smax(X, Y) s<= X --> X s>= Y
3440  if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SLE)
3441  return new ICmpInst(ICmpInst::ICMP_SGE, X, Y);
3442 
3443  // smax(X, Y) != X --> X s< Y
3444  // smax(X, Y) s> X --> X s< Y
3445  if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_SGT)
3446  return new ICmpInst(ICmpInst::ICMP_SLT, X, Y);
3447 
3448  // These cases should be handled in InstSimplify:
3449  // smax(X, Y) s>= X --> true
3450  // smax(X, Y) s< X --> false
3451  return nullptr;
3452  }
3453 
3454  if (match(Op0, m_c_UMin(m_Specific(X), m_Value(Y)))) {
3455  // umin(X, Y) == X --> X u<= Y
3456  // umin(X, Y) u>= X --> X u<= Y
3457  if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_UGE)
3458  return new ICmpInst(ICmpInst::ICMP_ULE, X, Y);
3459 
3460  // umin(X, Y) != X --> X u> Y
3461  // umin(X, Y) u< X --> X u> Y
3462  if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_ULT)
3463  return new ICmpInst(ICmpInst::ICMP_UGT, X, Y);
3464 
3465  // These cases should be handled in InstSimplify:
3466  // umin(X, Y) u<= X --> true
3467  // umin(X, Y) u> X --> false
3468  return nullptr;
3469  }
3470 
3471  if (match(Op0, m_c_UMax(m_Specific(X), m_Value(Y)))) {
3472  // umax(X, Y) == X --> X u>= Y
3473  // umax(X, Y) u<= X --> X u>= Y
3474  if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_ULE)
3475  return new ICmpInst(ICmpInst::ICMP_UGE, X, Y);
3476 
3477  // umax(X, Y) != X --> X u< Y
3478  // umax(X, Y) u> X --> X u< Y
3479  if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_UGT)
3480  return new ICmpInst(ICmpInst::ICMP_ULT, X, Y);
3481 
3482  // These cases should be handled in InstSimplify:
3483  // umax(X, Y) u>= X --> true
3484  // umax(X, Y) u< X --> false
3485  return nullptr;
3486  }
3487 
3488  return nullptr;
3489 }
3490 
3491 Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
3492  if (!I.isEquality())
3493  return nullptr;
3494 
3495  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
3496  const CmpInst::Predicate Pred = I.getPredicate();
3497  Value *A, *B, *C, *D;
3498  if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
3499  if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0
3500  Value *OtherVal = A == Op1 ? B : A;
3501  return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType()));
3502  }
3503 
3504  if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
3505  // A^c1 == C^c2 --> A == C^(c1^c2)
3506  ConstantInt *C1, *C2;
3507  if (match(B, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2)) &&
3508  Op1->hasOneUse()) {
3509  Constant *NC = Builder.getInt(C1->getValue() ^ C2->getValue());
3510  Value *Xor = Builder.CreateXor(C, NC);
3511  return new ICmpInst(Pred, A, Xor);
3512  }
3513 
3514  // A^B == A^D -> B == D
3515  if (A == C)
3516  return new ICmpInst(Pred, B, D);
3517  if (A == D)
3518  return new ICmpInst(Pred, B, C);
3519  if (B == C)
3520  return new ICmpInst(Pred, A, D);
3521  if (B == D)
3522  return new ICmpInst(Pred, A, C);
3523  }
3524  }
3525 
3526  if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) {
3527  // A == (A^B) -> B == 0
3528  Value *OtherVal = A == Op0 ? B : A;
3529  return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType()));
3530  }
3531 
3532  // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
3533  if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) &&
3534  match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) {
3535  Value *X = nullptr, *Y = nullptr, *Z = nullptr;
3536 
3537  if (A == C) {
3538  X = B;
3539  Y = D;
3540  Z = A;
3541  } else if (A == D) {
3542  X = B;
3543  Y = C;
3544  Z = A;
3545  } else if (B == C) {
3546  X = A;
3547  Y = D;
3548  Z = B;
3549  } else if (B == D) {
3550  X = A;
3551  Y = C;
3552  Z = B;
3553  }
3554 
3555  if (X) { // Build (X^Y) & Z
3556  Op1 = Builder.CreateXor(X, Y);
3557  Op1 = Builder.CreateAnd(Op1, Z);
3558  I.setOperand(0, Op1);
3559  I.setOperand(1, Constant::getNullValue(Op1->getType()));
3560  return &I;
3561  }
3562  }
3563 
3564  // Transform (zext A) == (B & (1<<X)-1) --> A == (trunc B)
3565  // and (B & (1<<X)-1) == (zext A) --> A == (trunc B)
3566  ConstantInt *Cst1;
3567  if ((Op0->hasOneUse() && match(Op0, m_ZExt(m_Value(A))) &&
3568  match(Op1, m_And(m_Value(B), m_ConstantInt(Cst1)))) ||
3569  (Op1->hasOneUse() && match(Op0, m_And(m_Value(B), m_ConstantInt(Cst1))) &&
3570  match(Op1, m_ZExt(m_Value(A))))) {
3571  APInt Pow2 = Cst1->getValue() + 1;
3572  if (Pow2.isPowerOf2() && isa<IntegerType>(A->getType()) &&
3573  Pow2.logBase2() == cast<IntegerType>(A->getType())->getBitWidth())
3574  return new ICmpInst(Pred, A, Builder.CreateTrunc(B, A->getType()));
3575  }
3576 
3577  // (A >> C) == (B >> C) --> (A^B) u< (1 << C)
3578  // For lshr and ashr pairs.
3579  if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) &&
3580  match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) ||
3581  (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) &&
3582  match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) {
3583  unsigned TypeBits = Cst1->getBitWidth();
3584  unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
3585  if (ShAmt < TypeBits && ShAmt != 0) {
3586  ICmpInst::Predicate NewPred =
3588  Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted");
3589  APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt);
3590  return new ICmpInst(NewPred, Xor, Builder.getInt(CmpVal));
3591  }
3592  }
3593 
3594  // (A << C) == (B << C) --> ((A^B) & (~0U >> C)) == 0
3595  if (match(Op0, m_OneUse(m_Shl(m_Value(A), m_ConstantInt(Cst1)))) &&
3596  match(Op1, m_OneUse(m_Shl(m_Value(B), m_Specific(Cst1))))) {
3597  unsigned TypeBits = Cst1->getBitWidth();
3598  unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
3599  if (ShAmt < TypeBits && ShAmt != 0) {
3600  Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted");
3601  APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt);
3602  Value *And = Builder.CreateAnd(Xor, Builder.getInt(AndVal),
3603  I.getName() + ".mask");
3604  return new ICmpInst(Pred, And, Constant::getNullValue(Cst1->getType()));
3605  }
3606  }
3607 
3608  // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
3609  // "icmp (and X, mask), cst"
3610  uint64_t ShAmt = 0;
3611  if (Op0->hasOneUse() &&
3612  match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A), m_ConstantInt(ShAmt))))) &&
3613  match(Op1, m_ConstantInt(Cst1)) &&
3614  // Only do this when A has multiple uses. This is most important to do
3615  // when it exposes other optimizations.
3616  !A->hasOneUse()) {
3617  unsigned ASize = cast<IntegerType>(A->getType())->getPrimitiveSizeInBits();
3618 
3619  if (ShAmt < ASize) {
3620  APInt MaskV =
3622  MaskV <<= ShAmt;
3623 
3624  APInt CmpV = Cst1->getValue().zext(ASize);
3625  CmpV <<= ShAmt;
3626 
3627  Value *Mask = Builder.CreateAnd(A, Builder.getInt(MaskV));
3628  return new ICmpInst(Pred, Mask, Builder.getInt(CmpV));
3629  }
3630  }
3631 
3632  // If both operands are byte-swapped or bit-reversed, just compare the
3633  // original values.
3634  // TODO: Move this to a function similar to foldICmpIntrinsicWithConstant()
3635  // and handle more intrinsics.
3636  if ((match(Op0, m_BSwap(m_Value(A))) && match(Op1, m_BSwap(m_Value(B)))) ||
3637  (match(Op0, m_BitReverse(m_Value(A))) &&
3638  match(Op1, m_BitReverse(m_Value(B)))))
3639  return new ICmpInst(Pred, A, B);
3640 
3641  return nullptr;
3642 }
3643 
3644 /// Handle icmp (cast x to y), (cast/cst). We only handle extending casts so
3645 /// far.
3646 Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) {
3647  const CastInst *LHSCI = cast<CastInst>(ICmp.getOperand(0));
3648  Value *LHSCIOp = LHSCI->getOperand(0);
3649  Type *SrcTy = LHSCIOp->getType();
3650  Type *DestTy = LHSCI->getType();
3651  Value *RHSCIOp;
3652 
3653  // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
3654  // integer type is the same size as the pointer type.
3655  const auto& CompatibleSizes = [&](Type* SrcTy, Type* DestTy) -> bool {
3656  if (isa<VectorType>(SrcTy)) {
3657  SrcTy = cast<VectorType>(SrcTy)->getElementType();
3658  DestTy = cast<VectorType>(DestTy)->getElementType();
3659  }
3660  return DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth();
3661  };
3662  if (LHSCI->getOpcode() == Instruction::PtrToInt &&
3663  CompatibleSizes(SrcTy, DestTy)) {
3664  Value *RHSOp = nullptr;
3665  if (auto *RHSC = dyn_cast<PtrToIntOperator>(ICmp.getOperand(1))) {
3666  Value *RHSCIOp = RHSC->getOperand(0);
3667  if (RHSCIOp->getType()->getPointerAddressSpace() ==
3668  LHSCIOp->getType()->getPointerAddressSpace()) {
3669  RHSOp = RHSC->getOperand(0);
3670  // If the pointer types don't match, insert a bitcast.
3671  if (LHSCIOp->getType() != RHSOp->getType())
3672  RHSOp = Builder.CreateBitCast(RHSOp, LHSCIOp->getType());
3673  }
3674  } else if (auto *RHSC = dyn_cast<Constant>(ICmp.getOperand(1))) {
3675  RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
3676  }
3677 
3678  if (RHSOp)
3679  return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSOp);
3680  }
3681 
3682  // The code below only handles extension cast instructions, so far.
3683  // Enforce this.
3684  if (LHSCI->getOpcode() != Instruction::ZExt &&
3685  LHSCI->getOpcode() != Instruction::SExt)
3686  return nullptr;
3687 
3688  bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt;
3689  bool isSignedCmp = ICmp.isSigned();
3690 
3691  if (auto *CI = dyn_cast<CastInst>(ICmp.getOperand(1))) {
3692  // Not an extension from the same type?
3693  RHSCIOp = CI->getOperand(0);
3694  if (RHSCIOp->getType() != LHSCIOp->getType())
3695  return nullptr;
3696 
3697  // If the signedness of the two casts doesn't agree (i.e. one is a sext
3698  // and the other is a zext), then we can't handle this.
3699  if (CI->getOpcode() != LHSCI->getOpcode())
3700  return nullptr;
3701 
3702  // Deal with equality cases early.
3703  if (ICmp.isEquality())
3704  return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSCIOp);
3705 
3706  // A signed comparison of sign extended values simplifies into a
3707  // signed comparison.
3708  if (isSignedCmp && isSignedExt)
3709  return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSCIOp);
3710 
3711  // The other three cases all fold into an unsigned comparison.
3712  return new ICmpInst(ICmp.getUnsignedPredicate(), LHSCIOp, RHSCIOp);
3713  }
3714 
3715  // If we aren't dealing with a constant on the RHS, exit early.
3716  auto *C = dyn_cast<Constant>(ICmp.getOperand(1));
3717  if (!C)
3718  return nullptr;
3719 
3720  // Compute the constant that would happen if we truncated to SrcTy then
3721  // re-extended to DestTy.
3722  Constant *Res1 = ConstantExpr::getTrunc(C, SrcTy);
3723  Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), Res1, DestTy);
3724 
3725  // If the re-extended constant didn't change...
3726  if (Res2 == C) {
3727  // Deal with equality cases early.
3728  if (ICmp.isEquality())
3729  return new ICmpInst(ICmp.getPredicate(), LHSCIOp, Res1);
3730 
3731  // A signed comparison of sign extended values simplifies into a
3732  // signed comparison.
3733  if (isSignedExt && isSignedCmp)
3734  return new ICmpInst(ICmp.getPredicate(), LHSCIOp, Res1);
3735 
3736  // The other three cases all fold into an unsigned comparison.
3737  return new ICmpInst(ICmp.getUnsignedPredicate(), LHSCIOp, Res1);
3738  }
3739 
3740  // The re-extended constant changed, partly changed (in the case of a vector),
3741  // or could not be determined to be equal (in the case of a constant
3742  // expression), so the constant cannot be represented in the shorter type.
3743  // Consequently, we cannot emit a simple comparison.
3744  // All the cases that fold to true or false will have already been handled
3745  // by SimplifyICmpInst, so only deal with the tricky case.
3746 
3747  if (isSignedCmp || !isSignedExt || !isa<ConstantInt>(C))
3748  return nullptr;
3749 
3750  // Evaluate the comparison for LT (we invert for GT below). LE and GE cases
3751  // should have been folded away previously and not enter in here.
3752 
3753  // We're performing an unsigned comp with a sign extended value.
3754  // This is true if the input is >= 0. [aka >s -1]
3755  Constant *NegOne = Constant::getAllOnesValue(SrcTy);
3756  Value *Result = Builder.CreateICmpSGT(LHSCIOp, NegOne, ICmp.getName());
3757 
3758  // Finally, return the value computed.
3759  if (ICmp.getPredicate() == ICmpInst::ICMP_ULT)
3760  return replaceInstUsesWith(ICmp, Result);
3761 
3762  assert(ICmp.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!");
3763  return BinaryOperator::CreateNot(Result);
3764 }
3765 
3766 bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
3767  Value *RHS, Instruction &OrigI,
3768  Value *&Result, Constant *&Overflow) {
3769  if (OrigI.isCommutative() && isa<Constant>(LHS) && !isa<Constant>(RHS))
3770  std::swap(LHS, RHS);
3771 
3772  auto SetResult = [&](Value *OpResult, Constant *OverflowVal, bool ReuseName) {
3773  Result = OpResult;
3774  Overflow = OverflowVal;
3775  if (ReuseName)
3776  Result->takeName(&OrigI);
3777  return true;
3778  };
3779 
3780  // If the overflow check was an add followed by a compare, the insertion point
3781  // may be pointing to the compare. We want to insert the new instructions
3782  // before the add in case there are uses of the add between the add and the
3783  // compare.
3784  Builder.SetInsertPoint(&OrigI);
3785 
3786  switch (OCF) {
3787  case OCF_INVALID:
3788  llvm_unreachable("bad overflow check kind!");
3789 
3790  case OCF_UNSIGNED_ADD: {
3791  OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI);
3793  return SetResult(Builder.CreateNUWAdd(LHS, RHS), Builder.getFalse(),
3794  true);
3795 
3797  return SetResult(Builder.CreateAdd(LHS, RHS), Builder.getTrue(), true);
3798 
3799  // Fall through uadd into sadd
3801  }
3802  case OCF_SIGNED_ADD: {
3803  // X + 0 -> {X, false}
3804  if (match(RHS, m_Zero()))
3805  return SetResult(LHS, Builder.getFalse(), false);
3806 
3807  // We can strength reduce this signed add into a regular add if we can prove
3808  // that it will never overflow.
3809  if (OCF == OCF_SIGNED_ADD)
3810  if (willNotOverflowSignedAdd(LHS, RHS, OrigI))
3811  return SetResult(Builder.CreateNSWAdd(LHS, RHS), Builder.getFalse(),
3812  true);
3813  break;
3814  }
3815 
3816  case OCF_UNSIGNED_SUB:
3817  case OCF_SIGNED_SUB: {
3818  // X - 0 -> {X, false}
3819  if (match(RHS, m_Zero()))
3820  return SetResult(LHS, Builder.getFalse(), false);
3821 
3822  if (OCF == OCF_SIGNED_SUB) {
3823  if (willNotOverflowSignedSub(LHS, RHS, OrigI))
3824  return SetResult(Builder.CreateNSWSub(LHS, RHS), Builder.getFalse(),
3825  true);
3826  } else {
3827  if (willNotOverflowUnsignedSub(LHS, RHS, OrigI))
3828  return SetResult(Builder.CreateNUWSub(LHS, RHS), Builder.getFalse(),
3829  true);
3830  }
3831  break;
3832  }
3833 
3834  case OCF_UNSIGNED_MUL: {
3835  OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI);
3837  return SetResult(Builder.CreateNUWMul(LHS, RHS), Builder.getFalse(),
3838  true);
3840  return SetResult(Builder.CreateMul(LHS, RHS), Builder.getTrue(), true);
3842  }
3843  case OCF_SIGNED_MUL:
3844  // X * undef -> undef
3845  if (isa<UndefValue>(RHS))
3846  return SetResult(RHS, UndefValue::get(Builder.getInt1Ty()), false);
3847 
3848  // X * 0 -> {0, false}
3849  if (match(RHS, m_Zero()))
3850  return SetResult(RHS, Builder.getFalse(), false);
3851 
3852  // X * 1 -> {X, false}
3853  if (match(RHS, m_One()))
3854  return SetResult(LHS, Builder.getFalse(), false);
3855 
3856  if (OCF == OCF_SIGNED_MUL)
3857  if (willNotOverflowSignedMul(LHS, RHS, OrigI))
3858  return SetResult(Builder.CreateNSWMul(LHS, RHS), Builder.getFalse(),
3859  true);
3860  break;
3861  }
3862 
3863  return false;
3864 }
3865 
3866 /// Recognize and process idiom involving test for multiplication
3867 /// overflow.
3868 ///
3869 /// The caller has matched a pattern of the form:
3870 /// I = cmp u (mul(zext A, zext B), V
3871 /// The function checks if this is a test for overflow and if so replaces
3872 /// multiplication with call to 'mul.with.overflow' intrinsic.
3873 ///
3874 /// \param I Compare instruction.
3875 /// \param MulVal Result of 'mult' instruction. It is one of the arguments of
3876 /// the compare instruction. Must be of integer type.
3877 /// \param OtherVal The other argument of compare instruction.
3878 /// \returns Instruction which must replace the compare instruction, NULL if no
3879 /// replacement required.
3881  Value *OtherVal, InstCombiner &IC) {
3882  // Don't bother doing this transformation for pointers, don't do it for
3883  // vectors.
3884  if (!isa<IntegerType>(MulVal->getType()))
3885  return nullptr;
3886 
3887  assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal);
3888  assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal);
3889  auto *MulInstr = dyn_cast<Instruction>(MulVal);
3890  if (!MulInstr)
3891  return nullptr;
3892  assert(MulInstr->getOpcode() == Instruction::Mul);
3893 
3894  auto *LHS = cast<ZExtOperator>(MulInstr->getOperand(0)),
3895  *RHS = cast<ZExtOperator>(MulInstr->getOperand(1));
3896  assert(LHS->getOpcode() == Instruction::ZExt);
3897  assert(RHS->getOpcode() == Instruction::ZExt);
3898  Value *A = LHS->getOperand(0), *B = RHS->getOperand(0);
3899 
3900  // Calculate type and width of the result produced by mul.with.overflow.
3901  Type *TyA = A->getType(), *TyB = B->getType();
3902  unsigned WidthA = TyA->getPrimitiveSizeInBits(),
3903  WidthB = TyB->getPrimitiveSizeInBits();
3904  unsigned MulWidth;
3905  Type *MulType;
3906  if (WidthB > WidthA) {
3907  MulWidth = WidthB;
3908  MulType = TyB;
3909  } else {
3910  MulWidth = WidthA;
3911  MulType = TyA;
3912  }
3913 
3914  // In order to replace the original mul with a narrower mul.with.overflow,
3915  // all uses must ignore upper bits of the product. The number of used low
3916  // bits must be not greater than the width of mul.with.overflow.
3917  if (MulVal->hasNUsesOrMore(2))
3918  for (User *U : MulVal->users()) {
3919  if (U == &I)
3920  continue;
3921  if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
3922  // Check if truncation ignores bits above MulWidth.
3923  unsigned TruncWidth = TI->getType()->getPrimitiveSizeInBits();
3924  if (TruncWidth > MulWidth)
3925  return nullptr;
3926  } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
3927  // Check if AND ignores bits above MulWidth.
3928  if (BO->getOpcode() != Instruction::And)
3929  return nullptr;
3930  if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
3931  const APInt &CVal = CI->getValue();
3932  if (CVal.getBitWidth() - CVal.countLeadingZeros() > MulWidth)
3933  return nullptr;
3934  } else {
3935  // In this case we could have the operand of the binary operation
3936  // being defined in another block, and performing the replacement
3937  // could break the dominance relation.
3938  return nullptr;
3939  }
3940  } else {
3941  // Other uses prohibit this transformation.
3942  return nullptr;
3943  }
3944  }
3945 
3946  // Recognize patterns
3947  switch (I.getPredicate()) {
3948  case ICmpInst::ICMP_EQ:
3949  case ICmpInst::ICMP_NE:
3950  // Recognize pattern:
3951  // mulval = mul(zext A, zext B)
3952  // cmp eq/neq mulval, zext trunc mulval
3953  if (ZExtInst *Zext = dyn_cast<ZExtInst>(OtherVal))
3954  if (Zext->hasOneUse()) {
3955  Value *ZextArg = Zext->getOperand(0);
3956  if (TruncInst *Trunc = dyn_cast<TruncInst>(ZextArg))
3957  if (Trunc->getType()->getPrimitiveSizeInBits() == MulWidth)
3958  break; //Recognized
3959  }
3960 
3961  // Recognize pattern:
3962  // mulval = mul(zext A, zext B)
3963  // cmp eq/neq mulval, and(mulval, mask), mask selects low MulWidth bits.
3964  ConstantInt *CI;
3965  Value *ValToMask;
3966  if (match(OtherVal, m_And(m_Value(ValToMask), m_ConstantInt(CI)))) {
3967  if (ValToMask != MulVal)
3968  return nullptr;
3969  const APInt &CVal = CI->getValue() + 1;
3970  if (CVal.isPowerOf2()) {
3971  unsigned MaskWidth = CVal.logBase2();
3972  if (MaskWidth == MulWidth)
3973  break; // Recognized
3974  }
3975  }
3976  return nullptr;
3977 
3978  case ICmpInst::ICMP_UGT:
3979  // Recognize pattern:
3980  // mulval = mul(zext A, zext B)
3981  // cmp ugt mulval, max
3982  if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
3983  APInt MaxVal = APInt::getMaxValue(MulWidth);
3984  MaxVal = MaxVal.zext(CI->getBitWidth());
3985  if (MaxVal.eq(CI->getValue()))
3986  break; // Recognized
3987  }
3988  return nullptr;
3989 
3990  case ICmpInst::ICMP_UGE:
3991  // Recognize pattern:
3992  // mulval = mul(zext A, zext B)
3993  // cmp uge mulval, max+1
3994  if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
3995  APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth);
3996  if (MaxVal.eq(CI->getValue()))
3997  break; // Recognized
3998  }
3999  return nullptr;
4000 
4001  case ICmpInst::ICMP_ULE:
4002  // Recognize pattern:
4003  // mulval = mul(zext A, zext B)
4004  // cmp ule mulval, max
4005  if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
4006  APInt MaxVal = APInt::getMaxValue(MulWidth);
4007  MaxVal = MaxVal.zext(CI->getBitWidth());
4008  if (MaxVal.eq(CI->getValue()))
4009  break; // Recognized
4010  }
4011  return nullptr;
4012 
4013  case ICmpInst::ICMP_ULT:
4014  // Recognize pattern:
4015  // mulval = mul(zext A, zext B)
4016  // cmp ule mulval, max + 1
4017  if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
4018  APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth);
4019  if (MaxVal.eq(CI->getValue()))
4020  break; // Recognized
4021  }
4022  return nullptr;
4023 
4024  default:
4025  return nullptr;
4026  }
4027 
4028  InstCombiner::BuilderTy &Builder = IC.Builder;
4029  Builder.SetInsertPoint(MulInstr);
4030 
4031  // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B)
4032  Value *MulA = A, *MulB = B;
4033  if (WidthA < MulWidth)
4034  MulA = Builder.CreateZExt(A, MulType);
4035  if (WidthB < MulWidth)
4036  MulB = Builder.CreateZExt(B, MulType);
4038  Intrinsic::umul_with_overflow, MulType);
4039  CallInst *Call = Builder.CreateCall(F, {MulA, MulB}, "umul");
4040  IC.Worklist.Add(MulInstr);
4041 
4042  // If there are uses of mul result other than the comparison, we know that
4043  // they are truncation or binary AND. Change them to use result of
4044  // mul.with.overflow and adjust properly mask/size.
4045  if (MulVal->hasNUsesOrMore(2)) {
4046  Value *Mul = Builder.CreateExtractValue(Call, 0, "umul.value");
4047  for (auto UI = MulVal->user_begin(), UE = MulVal->user_end(); UI != UE;) {
4048  User *U = *UI++;
4049  if (U == &I || U == OtherVal)
4050  continue;
4051  if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
4052  if (TI->getType()->getPrimitiveSizeInBits() == MulWidth)
4053  IC.replaceInstUsesWith(*TI, Mul);
4054  else
4055  TI->setOperand(0, Mul);
4056  } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
4057  assert(BO->getOpcode() == Instruction::And);
4058  // Replace (mul & mask) --> zext (mul.with.overflow & short_mask)
4059  ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
4060  APInt ShortMask = CI->getValue().trunc(MulWidth);
4061  Value *ShortAnd = Builder.CreateAnd(Mul, ShortMask);
4062  Instruction *Zext =
4063  cast<Instruction>(Builder.CreateZExt(ShortAnd, BO->getType()));
4064  IC.Worklist.Add(Zext);
4065  IC.replaceInstUsesWith(*BO, Zext);
4066  } else {
4067  llvm_unreachable("Unexpected Binary operation");
4068  }
4069  IC.Worklist.Add(cast<Instruction>(U));
4070  }
4071  }
4072  if (isa<Instruction>(OtherVal))
4073  IC.Worklist.Add(cast<Instruction>(OtherVal));
4074 
4075  // The original icmp gets replaced with the overflow value, maybe inverted
4076  // depending on predicate.
4077  bool Inverse = false;
4078  switch (I.getPredicate()) {
4079  case ICmpInst::ICMP_NE:
4080  break;
4081  case ICmpInst::ICMP_EQ:
4082  Inverse = true;
4083  break;
4084  case ICmpInst::ICMP_UGT:
4085  case ICmpInst::ICMP_UGE:
4086  if (I.getOperand(0) == MulVal)
4087  break;
4088  Inverse = true;
4089  break;
4090  case ICmpInst::ICMP_ULT:
4091  case ICmpInst::ICMP_ULE:
4092  if (I.getOperand(1) == MulVal)
4093  break;
4094  Inverse = true;
4095  break;
4096  default:
4097  llvm_unreachable("Unexpected predicate");
4098  }
4099  if (Inverse) {
4100  Value *Res = Builder.CreateExtractValue(Call, 1);
4101  return BinaryOperator::CreateNot(Res);
4102  }
4103 
4104  return ExtractValueInst::Create(Call, 1);
4105 }
4106 
4107 /// When performing a comparison against a constant, it is possible that not all
4108 /// the bits in the LHS are demanded. This helper method computes the mask that
4109 /// IS demanded.
4110 static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth) {
4111  const APInt *RHS;
4112  if (!match(I.getOperand(1), m_APInt(RHS)))
4113  return APInt::getAllOnesValue(BitWidth);
4114 
4115  // If this is a normal comparison, it demands all bits. If it is a sign bit
4116  // comparison, it only demands the sign bit.
4117  bool UnusedBit;
4118  if (isSignBitCheck(I.getPredicate(), *RHS, UnusedBit))
4119  return APInt::getSignMask(BitWidth);
4120 
4121  switch (I.getPredicate()) {
4122  // For a UGT comparison, we don't care about any bits that
4123  // correspond to the trailing ones of the comparand. The value of these
4124  // bits doesn't impact the outcome of the comparison, because any value
4125  // greater than the RHS must differ in a bit higher than these due to carry.
4126  case ICmpInst::ICMP_UGT:
4127  return APInt::getBitsSetFrom(BitWidth, RHS->countTrailingOnes());
4128 
4129  // Similarly, for a ULT comparison, we don't care about the trailing zeros.
4130  // Any value less than the RHS must differ in a higher bit because of carries.
4131  case ICmpInst::ICMP_ULT:
4132  return APInt::getBitsSetFrom(BitWidth, RHS->countTrailingZeros());
4133 
4134  default:
4135  return APInt::getAllOnesValue(BitWidth);
4136  }
4137 }
4138 
4139 /// Check if the order of \p Op0 and \p Op1 as operands in an ICmpInst
4140 /// should be swapped.
4141 /// The decision is based on how many times these two operands are reused
4142 /// as subtract operands and their positions in those instructions.
4143 /// The rationale is that several architectures use the same instruction for
4144 /// both subtract and cmp. Thus, it is better if the order of those operands
4145 /// match.
4146 /// \return true if Op0 and Op1 should be swapped.
4147 static bool swapMayExposeCSEOpportunities(const Value *Op0, const Value *Op1) {
4148  // Filter out pointer values as those cannot appear directly in subtract.
4149  // FIXME: we may want to go through inttoptrs or bitcasts.
4150  if (Op0->getType()->isPointerTy())
4151  return false;
4152  // If a subtract already has the same operands as a compare, swapping would be
4153  // bad. If a subtract has the same operands as a compare but in reverse order,
4154  // then swapping is good.
4155  int GoodToSwap = 0;
4156  for (const User *U : Op0->users()) {
4157  if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
4158  GoodToSwap++;
4159  else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
4160  GoodToSwap--;
4161  }
4162  return GoodToSwap > 0;
4163 }
4164 
4165 /// Check that one use is in the same block as the definition and all
4166 /// other uses are in blocks dominated by a given block.
4167 ///
4168 /// \param DI Definition
4169 /// \param UI Use
4170 /// \param DB Block that must dominate all uses of \p DI outside
4171 /// the parent block
4172 /// \return true when \p UI is the only use of \p DI in the parent block
4173 /// and all other uses of \p DI are in blocks dominated by \p DB.
4174 ///
4176  const Instruction *UI,
4177  const BasicBlock *DB) const {
4178  assert(DI && UI && "Instruction not defined\n");
4179  // Ignore incomplete definitions.
4180  if (!DI->getParent())
4181  return false;
4182  // DI and UI must be in the same block.
4183  if (DI->getParent() != UI->getParent())
4184  return false;
4185  // Protect from self-referencing blocks.
4186  if (DI->getParent() == DB)
4187  return false;
4188  for (const User *U : DI->users()) {
4189  auto *Usr = cast<Instruction>(U);
4190  if (Usr != UI && !DT.dominates(DB, Usr->getParent()))
4191  return false;
4192  }
4193  return true;
4194 }
4195 
4196 /// Return true when the instruction sequence within a block is select-cmp-br.
4197 static bool isChainSelectCmpBranch(const SelectInst *SI) {
4198  const BasicBlock *BB = SI->getParent();
4199  if (!BB)
4200  return false;
4201  auto *BI = dyn_cast_or_null<BranchInst>(BB->getTerminator());
4202  if (!BI || BI->getNumSuccessors() != 2)
4203  return false;
4204  auto *IC = dyn_cast<ICmpInst>(BI->getCondition());
4205  if (!IC || (IC->getOperand(0) != SI && IC->getOperand(1) != SI))
4206  return false;
4207  return true;
4208 }
4209 
4210 /// True when a select result is replaced by one of its operands
4211 /// in select-icmp sequence. This will eventually result in the elimination
4212 /// of the select.
4213 ///
4214 /// \param SI Select instruction
4215 /// \param Icmp Compare instruction
4216 /// \param SIOpd Operand that replaces the select
4217 ///
4218 /// Notes:
4219 /// - The replacement is global and requires dominator information
4220 /// - The caller is responsible for the actual replacement
4221 ///
4222 /// Example:
4223 ///
4224 /// entry:
4225 /// %4 = select i1 %3, %C* %0, %C* null
4226 /// %5 = icmp eq %C* %4, null
4227 /// br i1 %5, label %9, label %7
4228 /// ...
4229 /// ; <label>:7 ; preds = %entry
4230 /// %8 = getelementptr inbounds %C* %4, i64 0, i32 0
4231 /// ...
4232 ///
4233 /// can be transformed to
4234 ///
4235 /// %5 = icmp eq %C* %0, null
4236 /// %6 = select i1 %3, i1 %5, i1 true
4237 /// br i1 %6, label %9, label %7
4238 /// ...
4239 /// ; <label>:7 ; preds = %entry
4240 /// %8 = getelementptr inbounds %C* %0, i64 0, i32 0 // replace by %0!
4241 ///
4242 /// Similar when the first operand of the select is a constant or/and
4243 /// the compare is for not equal rather than equal.
4244 ///
4245 /// NOTE: The function is only called when the select and compare constants
4246 /// are equal, the optimization can work only for EQ predicates. This is not a
4247 /// major restriction since a NE compare should be 'normalized' to an equal
4248 /// compare, which usually happens in the combiner and test case
4249 /// select-cmp-br.ll checks for it.
4251  const ICmpInst *Icmp,
4252  const unsigned SIOpd) {
4253  assert((SIOpd == 1 || SIOpd == 2) && "Invalid select operand!");
4254  if (isChainSelectCmpBranch(SI) && Icmp->getPredicate() == ICmpInst::ICMP_EQ) {
4255  BasicBlock *Succ = SI->getParent()->getTerminator()->getSuccessor(1);
4256  // The check for the single predecessor is not the best that can be
4257  // done. But it protects efficiently against cases like when SI's
4258  // home block has two successors, Succ and Succ1, and Succ1 predecessor
4259  // of Succ. Then SI can't be replaced by SIOpd because the use that gets
4260  // replaced can be reached on either path. So the uniqueness check
4261  // guarantees that the path all uses of SI (outside SI's parent) are on
4262  // is disjoint from all other paths out of SI. But that information
4263  // is more expensive to compute, and the trade-off here is in favor
4264  // of compile-time. It should also be noticed that we check for a single
4265  // predecessor and not only uniqueness. This to handle the situation when
4266  // Succ and Succ1 points to the same basic block.
4267  if (Succ->getSinglePredecessor() && dominatesAllUses(SI, Icmp, Succ)) {
4268  NumSel++;
4269  SI->replaceUsesOutsideBlock(SI->getOperand(SIOpd), SI->getParent());
4270  return true;
4271  }
4272  }
4273  return false;
4274 }
4275 
4276 /// Try to fold the comparison based on range information we can get by checking
4277 /// whether bits are known to be zero or one in the inputs.
4278 Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
4279  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
4280  Type *Ty = Op0->getType();
4281  ICmpInst::Predicate Pred = I.getPredicate();
4282 
4283  // Get scalar or pointer size.
4284  unsigned BitWidth = Ty->isIntOrIntVectorTy()
4285  ? Ty->getScalarSizeInBits()
4286  : DL.getIndexTypeSizeInBits(Ty->getScalarType());
4287 
4288  if (!BitWidth)
4289  return nullptr;
4290 
4291  KnownBits Op0Known(BitWidth);
4292  KnownBits Op1Known(BitWidth);
4293 
4294  if (SimplifyDemandedBits(&I, 0,
4295  getDemandedBitsLHSMask(I, BitWidth),
4296  Op0Known, 0))
4297  return &I;
4298 
4299  if (SimplifyDemandedBits(&I, 1, APInt::getAllOnesValue(BitWidth),
4300  Op1Known, 0))
4301  return &I;
4302 
4303  // Given the known and unknown bits, compute a range that the LHS could be
4304  // in. Compute the Min, Max and RHS values based on the known bits. For the
4305  // EQ and NE we use unsigned values.
4306  APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
4307  APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
4308  if (I.isSigned()) {
4309  computeSignedMinMaxValuesFromKnownBits(Op0Known, Op0Min, Op0Max);
4310  computeSignedMinMaxValuesFromKnownBits(Op1Known, Op1Min, Op1Max);
4311  } else {
4312  computeUnsignedMinMaxValuesFromKnownBits(Op0Known, Op0Min, Op0Max);
4313  computeUnsignedMinMaxValuesFromKnownBits(Op1Known, Op1Min, Op1Max);
4314  }
4315 
4316  // If Min and Max are known to be the same, then SimplifyDemandedBits figured
4317  // out that the LHS or RHS is a constant. Constant fold this now, so that
4318  // code below can assume that Min != Max.
4319  if (!isa<Constant>(Op0) && Op0Min == Op0Max)
4320  return new ICmpInst(Pred, ConstantExpr::getIntegerValue(Ty, Op0Min), Op1);
4321  if (!isa<Constant>(Op1) && Op1Min == Op1Max)
4322  return new ICmpInst(Pred, Op0, ConstantExpr::getIntegerValue(Ty, Op1Min));
4323 
4324  // Based on the range information we know about the LHS, see if we can
4325  // simplify this comparison. For example, (x&4) < 8 is always true.
4326  switch (Pred) {
4327  default:
4328  llvm_unreachable("Unknown icmp opcode!");
4329  case ICmpInst::ICMP_EQ:
4330  case ICmpInst::ICMP_NE: {
4331  if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) {
4332  return Pred == CmpInst::ICMP_EQ
4333  ? replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()))
4334  : replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4335  }
4336 
4337  // If all bits are known zero except for one, then we know at most one bit
4338  // is set. If the comparison is against zero, then this is a check to see if
4339  // *that* bit is set.
4340  APInt Op0KnownZeroInverted = ~Op0Known.Zero;
4341  if (Op1Known.isZero()) {
4342  // If the LHS is an AND with the same constant, look through it.
4343  Value *LHS = nullptr;
4344  const APInt *LHSC;
4345  if (!match(Op0, m_And(m_Value(LHS), m_APInt(LHSC))) ||
4346  *LHSC != Op0KnownZeroInverted)
4347  LHS = Op0;
4348 
4349  Value *X;
4350  if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
4351  APInt ValToCheck = Op0KnownZeroInverted;
4352  Type *XTy = X->getType();
4353  if (ValToCheck.isPowerOf2()) {
4354  // ((1 << X) & 8) == 0 -> X != 3
4355  // ((1 << X) & 8) != 0 -> X == 3
4356  auto *CmpC = ConstantInt::get(XTy, ValToCheck.countTrailingZeros());
4357  auto NewPred = ICmpInst::getInversePredicate(Pred);
4358  return new ICmpInst(NewPred, X, CmpC);
4359  } else if ((++ValToCheck).isPowerOf2()) {
4360  // ((1 << X) & 7) == 0 -> X >= 3
4361  // ((1 << X) & 7) != 0 -> X < 3
4362  auto *CmpC = ConstantInt::get(XTy, ValToCheck.countTrailingZeros());
4363  auto NewPred =
4365  return new ICmpInst(NewPred, X, CmpC);
4366  }
4367  }
4368 
4369  // Check if the LHS is 8 >>u x and the result is a power of 2 like 1.
4370  const APInt *CI;
4371  if (Op0KnownZeroInverted.isOneValue() &&
4372  match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) {
4373  // ((8 >>u X) & 1) == 0 -> X != 3
4374  // ((8 >>u X) & 1) != 0 -> X == 3
4375  unsigned CmpVal = CI->countTrailingZeros();
4376  auto NewPred = ICmpInst::getInversePredicate(Pred);
4377  return new ICmpInst(NewPred, X, ConstantInt::get(X->getType(), CmpVal));
4378  }
4379  }
4380  break;
4381  }
4382  case ICmpInst::ICMP_ULT: {
4383  if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B)
4384  return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4385  if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B)
4386  return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4387  if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B)
4388  return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
4389 
4390  const APInt *CmpC;
4391  if (match(Op1, m_APInt(CmpC))) {
4392  // A <u C -> A == C-1 if min(A)+1 == C
4393  if (*CmpC == Op0Min + 1)
4394  return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
4395  ConstantInt::get(Op1->getType(), *CmpC - 1));
4396  // X <u C --> X == 0, if the number of zero bits in the bottom of X
4397  // exceeds the log2 of C.
4398  if (Op0Known.countMinTrailingZeros() >= CmpC->ceilLogBase2())
4399  return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
4400  Constant::getNullValue(Op1->getType()));
4401  }
4402  break;
4403  }
4404  case ICmpInst::ICMP_UGT: {
4405  if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B)
4406  return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4407  if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B)
4408  return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4409  if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B)
4410  return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
4411 
4412  const APInt *CmpC;
4413  if (match(Op1, m_APInt(CmpC))) {
4414  // A >u C -> A == C+1 if max(a)-1 == C
4415  if (*CmpC == Op0Max - 1)
4416  return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
4417  ConstantInt::get(Op1->getType(), *CmpC + 1));
4418  // X >u C --> X != 0, if the number of zero bits in the bottom of X
4419  // exceeds the log2 of C.
4420  if (Op0Known.countMinTrailingZeros() >= CmpC->getActiveBits())
4421  return new ICmpInst(ICmpInst::ICMP_NE, Op0,
4422  Constant::getNullValue(Op1->getType()));
4423  }
4424  break;
4425  }
4426  case ICmpInst::ICMP_SLT: {
4427  if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C)
4428  return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4429  if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C)
4430  return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4431  if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B)
4432  return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
4433  const APInt *CmpC;
4434  if (match(Op1, m_APInt(CmpC))) {
4435  if (*CmpC == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C
4436  return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
4437  ConstantInt::get(Op1->getType(), *CmpC - 1));
4438  }
4439  break;
4440  }
4441  case ICmpInst::ICMP_SGT: {
4442  if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B)
4443  return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4444  if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B)
4445  return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4446  if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B)
4447  return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
4448  const APInt *CmpC;
4449  if (match(Op1, m_APInt(CmpC))) {
4450  if (*CmpC == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C
4451  return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
4452  ConstantInt::get(Op1->getType(), *CmpC + 1));
4453  }
4454  break;
4455  }
4456  case ICmpInst::ICMP_SGE:
4457  assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
4458  if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B)
4459  return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4460  if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B)
4461  return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4462  if (Op1Min == Op0Max) // A >=s B -> A == B if max(A) == min(B)
4463  return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
4464  break;
4465  case ICmpInst::ICMP_SLE:
4466  assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
4467  if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B)
4468  return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4469  if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B)
4470  return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4471  if (Op1Max == Op0Min) // A <=s B -> A == B if min(A) == max(B)
4472  return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
4473  break;
4474  case ICmpInst::ICMP_UGE:
4475  assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
4476  if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B)
4477  return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4478  if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B)
4479  return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4480  if (Op1Min == Op0Max) // A >=u B -> A == B if max(A) == min(B)
4481  return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
4482  break;
4483  case ICmpInst::ICMP_ULE:
4484  assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
4485  if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B)
4486  return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4487  if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B)
4488  return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4489  if (Op1Max == Op0Min) // A <=u B -> A == B if min(A) == max(B)
4490  return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
4491  break;
4492  }
4493 
4494  // Turn a signed comparison into an unsigned one if both operands are known to
4495  // have the same sign.
4496  if (I.isSigned() &&
4497  ((Op0Known.Zero.isNegative() && Op1Known.Zero.isNegative()) ||
4498  (Op0Known.One.isNegative() && Op1Known.One.isNegative())))
4499  return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
4500 
4501  return nullptr;
4502 }
4503 
4504 /// If we have an icmp le or icmp ge instruction with a constant operand, turn
4505 /// it into the appropriate icmp lt or icmp gt instruction. This transform
4506 /// allows them to be folded in visitICmpInst.
4508  ICmpInst::Predicate Pred = I.getPredicate();
4509  if (Pred != ICmpInst::ICMP_SLE && Pred != ICmpInst::ICMP_SGE &&
4510  Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_UGE)
4511  return nullptr;
4512 
4513  Value *Op0 = I.getOperand(0);
4514  Value *Op1 = I.getOperand(1);
4515  auto *Op1C = dyn_cast<Constant>(Op1);
4516  if (!Op1C)
4517  return nullptr;
4518 
4519  // Check if the constant operand can be safely incremented/decremented without
4520  // overflowing/underflowing. For scalars, SimplifyICmpInst has already handled
4521  // the edge cases for us, so we just assert on them. For vectors, we must
4522  // handle the edge cases.
4523  Type *Op1Type = Op1->getType();
4524  bool IsSigned = I.isSigned();
4525  bool IsLE = (Pred == ICmpInst::ICMP_SLE || Pred == ICmpInst::ICMP_ULE);
4526  auto *CI = dyn_cast<ConstantInt>(Op1C);
4527  if (CI) {
4528  // A <= MAX -> TRUE ; A >= MIN -> TRUE
4529  assert(IsLE ? !CI->isMaxValue(IsSigned) : !CI->isMinValue(IsSigned));
4530  } else if (Op1Type->isVectorTy()) {
4531  // TODO? If the edge cases for vectors were guaranteed to be handled as they
4532  // are for scalar, we could remove the min/max checks. However, to do that,
4533  // we would have to use insertelement/shufflevector to replace edge values.
4534  unsigned NumElts = Op1Type->getVectorNumElements();
4535  for (unsigned i = 0; i != NumElts; ++i) {
4536  Constant *Elt = Op1C->getAggregateElement(i);
4537  if (!Elt)
4538  return nullptr;
4539 
4540  if (isa<UndefValue>(Elt))
4541  continue;
4542 
4543  // Bail out if we can't determine if this constant is min/max or if we
4544  // know that this constant is min/max.
4545  auto *CI = dyn_cast<ConstantInt>(Elt);
4546  if (!CI || (IsLE ? CI->isMaxValue(IsSigned) : CI->isMinValue(IsSigned)))
4547  return nullptr;
4548  }
4549  } else {
4550  // ConstantExpr?
4551  return nullptr;
4552  }
4553 
4554  // Increment or decrement the constant and set the new comparison predicate:
4555  // ULE -> ULT ; UGE -> UGT ; SLE -> SLT ; SGE -> SGT
4556  Constant *OneOrNegOne = ConstantInt::get(Op1Type, IsLE ? 1 : -1, true);
4558  NewPred = IsSigned ? ICmpInst::getSignedPredicate(NewPred) : NewPred;
4559  return new ICmpInst(NewPred, Op0, ConstantExpr::getAdd(Op1C, OneOrNegOne));
4560 }
4561 
4562 /// Integer compare with boolean values can always be turned into bitwise ops.
4564  InstCombiner::BuilderTy &Builder) {
4565  Value *A = I.getOperand(0), *B = I.getOperand(1);
4566  assert(A->getType()->isIntOrIntVectorTy(1) && "Bools only");
4567 
4568  // A boolean compared to true/false can be simplified to Op0/true/false in
4569  // 14 out of the 20 (10 predicates * 2 constants) possible combinations.
4570  // Cases not handled by InstSimplify are always 'not' of Op0.
4571  if (match(B, m_Zero())) {
4572  switch (I.getPredicate()) {
4573  case CmpInst::ICMP_EQ: // A == 0 -> !A
4574  case CmpInst::ICMP_ULE: // A <=u 0 -> !A
4575  case CmpInst::ICMP_SGE: // A >=s 0 -> !A
4576  return BinaryOperator::CreateNot(A);
4577  default:
4578  llvm_unreachable("ICmp i1 X, C not simplified as expected.");
4579  }
4580  } else if (match(B, m_One())) {
4581  switch (I.getPredicate()) {
4582  case CmpInst::ICMP_NE: // A != 1 -> !A
4583  case CmpInst::ICMP_ULT: // A <u 1 -> !A
4584  case CmpInst::ICMP_SGT: // A >s -1 -> !A
4585  return BinaryOperator::CreateNot(A);
4586  default:
4587  llvm_unreachable("ICmp i1 X, C not simplified as expected.");
4588  }
4589  }
4590 
4591  switch (I.getPredicate()) {
4592  default:
4593  llvm_unreachable("Invalid icmp instruction!");
4594  case ICmpInst::ICMP_EQ:
4595  // icmp eq i1 A, B -> ~(A ^ B)
4596  return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
4597 
4598  case ICmpInst::ICMP_NE:
4599  // icmp ne i1 A, B -> A ^ B
4600  return BinaryOperator::CreateXor(A, B);
4601 
4602  case ICmpInst::ICMP_UGT:
4603  // icmp ugt -> icmp ult
4604  std::swap(A, B);
4606  case ICmpInst::ICMP_ULT:
4607  // icmp ult i1 A, B -> ~A & B
4608  return BinaryOperator::CreateAnd(Builder.CreateNot(A), B);
4609 
4610  case ICmpInst::ICMP_SGT:
4611  // icmp sgt -> icmp slt
4612  std::swap(A, B);
4614  case ICmpInst::ICMP_SLT:
4615  // icmp slt i1 A, B -> A & ~B
4616  return BinaryOperator::CreateAnd(Builder.CreateNot(B), A);
4617 
4618  case ICmpInst::ICMP_UGE:
4619  // icmp uge -> icmp ule
4620  std::swap(A, B);
4622  case ICmpInst::ICMP_ULE:
4623  // icmp ule i1 A, B -> ~A | B
4624  return BinaryOperator::CreateOr(Builder.CreateNot(A), B);
4625 
4626  case ICmpInst::ICMP_SGE:
4627  // icmp sge -> icmp sle
4628  std::swap(A, B);
4630  case ICmpInst::ICMP_SLE:
4631  // icmp sle i1 A, B -> A | ~B
4632  return BinaryOperator::CreateOr(Builder.CreateNot(B), A);
4633  }
4634 }
4635 
4636 // Transform pattern like:
4637 // (1 << Y) u<= X or ~(-1 << Y) u< X or ((1 << Y)+(-1)) u< X
4638 // (1 << Y) u> X or ~(-1 << Y) u>= X or ((1 << Y)+(-1)) u>= X
4639 // Into:
4640 // (X l>> Y) != 0
4641 // (X l>> Y) == 0
4643  InstCombiner::BuilderTy &Builder) {
4644  ICmpInst::Predicate Pred, NewPred;
4645  Value *X, *Y;
4646  if (match(&Cmp,
4647  m_c_ICmp(Pred, m_OneUse(m_Shl(m_One(), m_Value(Y))), m_Value(X)))) {
4648  // We want X to be the icmp's second operand, so swap predicate if it isn't.
4649  if (Cmp.getOperand(0) == X)
4650  Pred = Cmp.getSwappedPredicate();
4651 
4652  switch (Pred) {
4653  case ICmpInst::ICMP_ULE:
4654  NewPred = ICmpInst::ICMP_NE;
4655  break;
4656  case ICmpInst::ICMP_UGT:
4657  NewPred = ICmpInst::ICMP_EQ;
4658  break;
4659  default:
4660  return nullptr;
4661  }
4662  } else if (match(&Cmp, m_c_ICmp(Pred,
4664  m_Not(m_Shl(m_AllOnes(), m_Value(Y))),
4665  m_Add(m_Shl(m_One(), m_Value(Y)),
4666  m_AllOnes()))),
4667  m_Value(X)))) {
4668  // The variant with 'add' is not canonical, (the variant with 'not' is)
4669  // we only get it because it has extra uses, and can't be canonicalized,
4670 
4671  // We want X to be the icmp's second operand, so swap predicate if it isn't.
4672  if (Cmp.getOperand(0) == X)
4673  Pred = Cmp.getSwappedPredicate();
4674 
4675  switch (Pred) {
4676  case ICmpInst::ICMP_ULT:
4677  NewPred = ICmpInst::ICMP_NE;
4678  break;
4679  case ICmpInst::ICMP_UGE:
4680  NewPred = ICmpInst::ICMP_EQ;
4681  break;
4682  default:
4683  return nullptr;
4684  }
4685  } else
4686  return nullptr;
4687 
4688  Value *NewX = Builder.CreateLShr(X, Y, X->getName() + ".highbits");
4689  Constant *Zero = Constant::getNullValue(NewX->getType());
4690  return CmpInst::Create(Instruction::ICmp, NewPred, NewX, Zero);
4691 }
4692 
4694  InstCombiner::BuilderTy &Builder) {
4695  // If both arguments of the cmp are shuffles that use the same mask and
4696  // shuffle within a single vector, move the shuffle after the cmp.
4697  Value *LHS = Cmp.getOperand(0), *RHS = Cmp.getOperand(1);
4698  Value *V1, *V2;
4699  Constant *M;
4700  if (match(LHS, m_ShuffleVector(m_Value(V1), m_Undef(), m_Constant(M))) &&
4701  match(RHS, m_ShuffleVector(m_Value(V2), m_Undef(), m_Specific(M))) &&
4702  V1->getType() == V2->getType() &&
4703  (LHS->hasOneUse() || RHS->hasOneUse())) {
4704  // cmp (shuffle V1, M), (shuffle V2, M) --> shuffle (cmp V1, V2), M
4706  Value *NewCmp = isa<ICmpInst>(Cmp) ? Builder.CreateICmp(P, V1, V2)
4707  : Builder.CreateFCmp(P, V1, V2);
4708  return new ShuffleVectorInst(NewCmp, UndefValue::get(NewCmp->getType()), M);
4709  }
4710  return nullptr;
4711 }
4712 
4714  bool Changed = false;
4715  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
4716  unsigned Op0Cplxity = getComplexity(Op0);
4717  unsigned Op1Cplxity = getComplexity(Op1);
4718 
4719  /// Orders the operands of the compare so that they are listed from most
4720  /// complex to least complex. This puts constants before unary operators,
4721  /// before binary operators.
4722  if (Op0Cplxity < Op1Cplxity ||
4723  (Op0Cplxity == Op1Cplxity && swapMayExposeCSEOpportunities(Op0, Op1))) {
4724  I.swapOperands();
4725  std::swap(Op0, Op1);
4726  Changed = true;
4727  }
4728 
4729  if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1,
4730  SQ.getWithInstruction(&I)))
4731  return replaceInstUsesWith(I, V);
4732 
4733  // Comparing -val or val with non-zero is the same as just comparing val
4734  // ie, abs(val) != 0 -> val != 0
4735  if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero())) {
4736  Value *Cond, *SelectTrue, *SelectFalse;
4737  if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue),
4738  m_Value(SelectFalse)))) {
4739  if (Value *V = dyn_castNegVal(SelectTrue)) {
4740  if (V == SelectFalse)
4741  return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
4742  }
4743  else if (Value *V = dyn_castNegVal(SelectFalse)) {
4744  if (V == SelectTrue)
4745  return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
4746  }
4747  }
4748  }
4749 
4750  if (Op0->getType()->isIntOrIntVectorTy(1))
4751  if (Instruction *Res = canonicalizeICmpBool(I, Builder))
4752  return Res;
4753 
4754  if (ICmpInst *NewICmp = canonicalizeCmpWithConstant(I))
4755  return NewICmp;
4756 
4757  if (Instruction *Res = foldICmpWithConstant(I))
4758  return Res;
4759 
4760  if (Instruction *Res = foldICmpUsingKnownBits(I))
4761  return Res;
4762 
4763  // Test if the ICmpInst instruction is used exclusively by a select as
4764  // part of a minimum or maximum operation. If so, refrain from doing
4765  // any other folding. This helps out other analyses which understand
4766  // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
4767  // and CodeGen. And in this case, at least one of the comparison
4768  // operands has at least one user besides the compare (the select),
4769  // which would often largely negate the benefit of folding anyway.
4770  //
4771  // Do the same for the other patterns recognized by matchSelectPattern.
4772  if (I.hasOneUse())
4773  if (SelectInst *SI = dyn_cast<SelectInst>(I.user_back())) {
4774  Value *A, *B;
4775  SelectPatternResult SPR = matchSelectPattern(SI, A, B);
4776  if (SPR.Flavor != SPF_UNKNOWN)
4777  return nullptr;
4778  }
4779 
4780  // Do this after checking for min/max to prevent infinite looping.
4781  if (Instruction *Res = foldICmpWithZero(I))
4782  return Res;
4783 
4784  // FIXME: We only do this after checking for min/max to prevent infinite
4785  // looping caused by a reverse canonicalization of these patterns for min/max.
4786  // FIXME: The organization of folds is a mess. These would naturally go into
4787  // canonicalizeCmpWithConstant(), but we can't move all of the above folds
4788  // down here after the min/max restriction.
4789  ICmpInst::Predicate Pred = I.getPredicate();
4790  const APInt *C;
4791  if (match(Op1, m_APInt(C))) {
4792  // For i32: x >u 2147483647 -> x <s 0 -> true if sign bit set
4793  if (Pred == ICmpInst::ICMP_UGT && C->isMaxSignedValue()) {
4794  Constant *Zero = Constant::getNullValue(Op0->getType());
4795  return new ICmpInst(ICmpInst::ICMP_SLT, Op0, Zero);
4796  }
4797 
4798  // For i32: x <u 2147483648 -> x >s -1 -> true if sign bit clear
4799  if (Pred == ICmpInst::ICMP_ULT && C->isMinSignedValue()) {
4800  Constant *AllOnes = Constant::getAllOnesValue(Op0->getType());
4801  return new ICmpInst(ICmpInst::ICMP_SGT, Op0, AllOnes);
4802  }
4803  }
4804 
4805  if (Instruction *Res = foldICmpInstWithConstant(I))
4806  return Res;
4807 
4808  if (Instruction *Res = foldICmpInstWithConstantNotInt(I))
4809  return Res;
4810 
4811  // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now.
4812  if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0))
4813  if (Instruction *NI = foldGEPICmp(GEP, Op1, I.getPredicate(), I))
4814  return NI;
4815  if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1))
4816  if (Instruction *NI = foldGEPICmp(GEP, Op0,
4818  return NI;
4819 
4820  // Try to optimize equality comparisons against alloca-based pointers.
4821  if (Op0->getType()->isPointerTy() && I.isEquality()) {
4822  assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?");
4823  if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op0, DL)))
4824  if (Instruction *New = foldAllocaCmp(I, Alloca, Op1))
4825  return New;
4826  if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op1, DL)))
4827  if (Instruction *New = foldAllocaCmp(I, Alloca, Op0))
4828  return New;
4829  }
4830 
4831  // Zero-equality and sign-bit checks are preserved through sitofp + bitcast.
4832  Value *X;
4833  if (match(Op0, m_BitCast(m_SIToFP(m_Value(X))))) {
4834  // icmp eq (bitcast (sitofp X)), 0 --> icmp eq X, 0
4835  // icmp ne (bitcast (sitofp X)), 0 --> icmp ne X, 0
4836  // icmp slt (bitcast (sitofp X)), 0 --> icmp slt X, 0
4837  // icmp sgt (bitcast (sitofp X)), 0 --> icmp sgt X, 0
4838  if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_SLT ||
4839  Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT) &&
4840  match(Op1, m_Zero()))
4841  return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
4842 
4843  // icmp slt (bitcast (sitofp X)), 1 --> icmp slt X, 1
4844  if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_One()))
4845  return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), 1));
4846 
4847  // icmp sgt (bitcast (sitofp X)), -1 --> icmp sgt X, -1
4848  if (Pred == ICmpInst::ICMP_SGT && match(Op1, m_AllOnes()))
4849  return new ICmpInst(Pred, X, ConstantInt::getAllOnesValue(X->getType()));
4850  }
4851 
4852  // Zero-equality checks are preserved through unsigned floating-point casts:
4853  // icmp eq (bitcast (uitofp X)), 0 --> icmp eq X, 0
4854  // icmp ne (bitcast (uitofp X)), 0 --> icmp ne X, 0
4855  if (match(Op0, m_BitCast(m_UIToFP(m_Value(X)))))
4856  if (I.isEquality() && match(Op1, m_Zero()))
4857  return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
4858 
4859  // Test to see if the operands of the icmp are casted versions of other
4860  // values. If the ptr->ptr cast can be stripped off both arguments, we do so
4861  // now.
4862  if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) {
4863  if (Op0->getType()->isPointerTy() &&
4864  (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
4865  // We keep moving the cast from the left operand over to the right
4866  // operand, where it can often be eliminated completely.
4867  Op0 = CI->getOperand(0);
4868 
4869  // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast
4870  // so eliminate it as well.
4871  if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1))
4872  Op1 = CI2->getOperand(0);
4873 
4874  // If Op1 is a constant, we can fold the cast into the constant.
4875  if (Op0->getType() != Op1->getType()) {
4876  if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
4877  Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
4878  } else {
4879  // Otherwise, cast the RHS right before the icmp
4880  Op1 = Builder.CreateBitCast(Op1, Op0->getType());
4881  }
4882  }
4883  return new ICmpInst(I.getPredicate(), Op0, Op1);
4884  }
4885  }
4886 
4887  if (isa<CastInst>(Op0)) {
4888  // Handle the special case of: icmp (cast bool to X), <cst>
4889  // This comes up when you have code like
4890  // int X = A < B;
4891  // if (X) ...
4892  // For generality, we handle any zero-extension of any operand comparison
4893  // with a constant or another cast from the same type.
4894  if (isa<Constant>(Op1) || isa<CastInst>(Op1))
4895  if (Instruction *R = foldICmpWithCastAndCast(I))
4896  return R;
4897  }
4898 
4899  if (Instruction *Res = foldICmpBinOp(I))
4900  return Res;
4901 
4902  if (Instruction *Res = foldICmpWithMinMax(I))
4903  return Res;
4904 
4905  {
4906  Value *A, *B;
4907  // Transform (A & ~B) == 0 --> (A & B) != 0
4908  // and (A & ~B) != 0 --> (A & B) == 0
4909  // if A is a power of 2.
4910  if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
4911  match(Op1, m_Zero()) &&
4912  isKnownToBeAPowerOfTwo(A, false, 0, &I) && I.isEquality())
4913  return new ICmpInst(I.getInversePredicate(), Builder.CreateAnd(A, B),
4914  Op1);
4915 
4916  // ~X < ~Y --> Y < X
4917  // ~X < C --> X > ~C
4918  if (match(Op0, m_Not(m_Value(A)))) {
4919  if (match(Op1, m_Not(m_Value(B))))
4920  return new ICmpInst(I.getPredicate(), B, A);
4921 
4922  const APInt *C;
4923  if (match(Op1, m_APInt(C)))
4924  return new ICmpInst(I.getSwappedPredicate(), A,
4925  ConstantInt::get(Op1->getType(), ~(*C)));
4926  }
4927 
4928  Instruction *AddI = nullptr;
4929  if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B),
4930  m_Instruction(AddI))) &&
4931  isa<IntegerType>(A->getType())) {
4932  Value *Result;
4933  Constant *Overflow;
4934  if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result,
4935  Overflow)) {
4936  replaceInstUsesWith(*AddI, Result);
4937  return replaceInstUsesWith(I, Overflow);
4938  }
4939  }
4940 
4941  // (zext a) * (zext b) --> llvm.umul.with.overflow.
4942  if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
4943  if (Instruction *R = processUMulZExtIdiom(I, Op0, Op1, *this))
4944  return R;
4945  }
4946  if (match(Op1, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
4947  if (Instruction *R = processUMulZExtIdiom(I, Op1, Op0, *this))
4948  return R;
4949  }
4950  }
4951 
4952  if (Instruction *Res = foldICmpEquality(I))
4953  return Res;
4954 
4955  // The 'cmpxchg' instruction returns an aggregate containing the old value and
4956  // an i1 which indicates whether or not we successfully did the swap.
4957  //
4958  // Replace comparisons between the old value and the expected value with the
4959  // indicator that 'cmpxchg' returns.
4960  //
4961  // N.B. This transform is only valid when the 'cmpxchg' is not permitted to
4962  // spuriously fail. In those cases, the old value may equal the expected
4963  // value but it is possible for the swap to not occur.
4964  if (I.getPredicate() == ICmpInst::ICMP_EQ)
4965  if (auto *EVI = dyn_cast<ExtractValueInst>(Op0))
4966  if (auto *ACXI = dyn_cast<AtomicCmpXchgInst>(EVI->getAggregateOperand()))
4967  if (EVI->getIndices()[0] == 0 && ACXI->getCompareOperand() == Op1 &&
4968  !ACXI->isWeak())
4969  return ExtractValueInst::Create(ACXI, 1);
4970 
4971  {
4972  Value *X;
4973  const APInt *C;
4974  // icmp X+Cst, X
4975  if (match(Op0, m_Add(m_Value(X), m_APInt(C))) && Op1 == X)
4976  return foldICmpAddOpConst(X, *C, I.getPredicate());
4977 
4978  // icmp X, X+Cst
4979  if (match(Op1, m_Add(m_Value(X), m_APInt(C))) && Op0 == X)
4980  return foldICmpAddOpConst(X, *C, I.getSwappedPredicate());
4981  }
4982 
4983  if (Instruction *Res = foldICmpWithHighBitMask(I, Builder))
4984  return Res;
4985 
4986  if (I.getType()->isVectorTy())
4987  if (Instruction *Res = foldVectorCmp(I, Builder))
4988  return Res;
4989 
4990  return Changed ? &I : nullptr;
4991 }
4992 
4993 /// Fold fcmp ([us]itofp x, cst) if possible.
4994 Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
4995  Constant *RHSC) {
4996  if (!isa<ConstantFP>(RHSC)) return nullptr;
4997  const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
4998 
4999  // Get the width of the mantissa. We don't want to hack on conversions that
5000  // might lose information from the integer, e.g. "i64 -> float"
5001  int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
5002  if (MantissaWidth == -1) return nullptr; // Unknown.
5003 
5004  IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
5005 
5006  bool LHSUnsigned = isa<UIToFPInst>(LHSI);
5007 
5008  if (I.isEquality()) {
5010  bool IsExact = false;
5011  APSInt RHSCvt(IntTy->getBitWidth(), LHSUnsigned);
5012  RHS.convertToInteger(RHSCvt, APFloat::rmNearestTiesToEven, &IsExact);
5013 
5014  // If the floating point constant isn't an integer value, we know if we will
5015  // ever compare equal / not equal to it.
5016  if (!IsExact) {
5017  // TODO: Can never be -0.0 and other non-representable values
5018  APFloat RHSRoundInt(RHS);
5020  if (RHS.compare(RHSRoundInt) != APFloat::cmpEqual) {
5021  if (P == FCmpInst::FCMP_OEQ || P == FCmpInst::FCMP_UEQ)
5022  return replaceInstUsesWith(I, Builder.getFalse());
5023 
5025  return replaceInstUsesWith(I, Builder.getTrue());
5026  }
5027  }
5028 
5029  // TODO: If the constant is exactly representable, is it always OK to do
5030  // equality compares as integer?
5031  }
5032 
5033  // Check to see that the input is converted from an integer type that is small
5034  // enough that preserves all bits. TODO: check here for "known" sign bits.
5035  // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
5036  unsigned InputSize = IntTy->getScalarSizeInBits();
5037 
5038  // Following test does NOT adjust InputSize downwards for signed inputs,
5039  // because the most negative value still requires all the mantissa bits
5040  // to distinguish it from one less than that value.
5041  if ((int)InputSize > MantissaWidth) {
5042  // Conversion would lose accuracy. Check if loss can impact comparison.
5043  int Exp = ilogb(RHS);
5044  if (Exp == APFloat::IEK_Inf) {
5045  int MaxExponent = ilogb(APFloat::getLargest(RHS.getSemantics()));
5046  if (MaxExponent < (int)InputSize - !LHSUnsigned)
5047  // Conversion could create infinity.
5048  return nullptr;
5049  } else {
5050  // Note that if RHS is zero or NaN, then Exp is negative
5051  // and first condition is trivially false.
5052  if (MantissaWidth <= Exp && Exp <= (int)InputSize - !LHSUnsigned)
5053  // Conversion could affect comparison.
5054  return nullptr;