LLVM  4.0.0
InstCombineCalls.cpp
Go to the documentation of this file.
1 //===- InstCombineCalls.cpp -----------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the visitCall and visitInvoke functions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Twine.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/CallSite.h"
28 #include "llvm/IR/Constant.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/DerivedTypes.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/GlobalVariable.h"
33 #include "llvm/IR/InstrTypes.h"
34 #include "llvm/IR/Instruction.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/Intrinsics.h"
38 #include "llvm/IR/LLVMContext.h"
39 #include "llvm/IR/Metadata.h"
40 #include "llvm/IR/PatternMatch.h"
41 #include "llvm/IR/Statepoint.h"
42 #include "llvm/IR/Type.h"
43 #include "llvm/IR/Value.h"
44 #include "llvm/IR/ValueHandle.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Debug.h"
50 #include <algorithm>
51 #include <cassert>
52 #include <cstdint>
53 #include <cstring>
54 #include <vector>
55 
56 using namespace llvm;
57 using namespace PatternMatch;
58 
59 #define DEBUG_TYPE "instcombine"
60 
61 STATISTIC(NumSimplified, "Number of library calls simplified");
62 
63 /// Return the specified type promoted as it would be to pass though a va_arg
64 /// area.
65 static Type *getPromotedType(Type *Ty) {
66  if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
67  if (ITy->getBitWidth() < 32)
68  return Type::getInt32Ty(Ty->getContext());
69  }
70  return Ty;
71 }
72 
73 /// Given an aggregate type which ultimately holds a single scalar element,
74 /// like {{{type}}} or [1 x type], return type.
76  while (!T->isSingleValueType()) {
77  if (StructType *STy = dyn_cast<StructType>(T)) {
78  if (STy->getNumElements() == 1)
79  T = STy->getElementType(0);
80  else
81  break;
82  } else if (ArrayType *ATy = dyn_cast<ArrayType>(T)) {
83  if (ATy->getNumElements() == 1)
84  T = ATy->getElementType();
85  else
86  break;
87  } else
88  break;
89  }
90 
91  return T;
92 }
93 
94 /// Return a constant boolean vector that has true elements in all positions
95 /// where the input constant data vector has an element with the sign bit set.
98  IntegerType *BoolTy = Type::getInt1Ty(V->getContext());
99  for (unsigned I = 0, E = V->getNumElements(); I != E; ++I) {
100  Constant *Elt = V->getElementAsConstant(I);
101  assert((isa<ConstantInt>(Elt) || isa<ConstantFP>(Elt)) &&
102  "Unexpected constant data vector element type");
103  bool Sign = V->getElementType()->isIntegerTy()
104  ? cast<ConstantInt>(Elt)->isNegative()
105  : cast<ConstantFP>(Elt)->isNegative();
106  BoolVec.push_back(ConstantInt::get(BoolTy, Sign));
107  }
108  return ConstantVector::get(BoolVec);
109 }
110 
111 Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
112  unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, &AC, &DT);
113  unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, &AC, &DT);
114  unsigned MinAlign = std::min(DstAlign, SrcAlign);
115  unsigned CopyAlign = MI->getAlignment();
116 
117  if (CopyAlign < MinAlign) {
118  MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), MinAlign, false));
119  return MI;
120  }
121 
122  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
123  // load/store.
124  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
125  if (!MemOpLength) return nullptr;
126 
127  // Source and destination pointer types are always "i8*" for intrinsic. See
128  // if the size is something we can handle with a single primitive load/store.
129  // A single load+store correctly handles overlapping memory in the memmove
130  // case.
131  uint64_t Size = MemOpLength->getLimitedValue();
132  assert(Size && "0-sized memory transferring should be removed already.");
133 
134  if (Size > 8 || (Size&(Size-1)))
135  return nullptr; // If not 1/2/4/8 bytes, exit.
136 
137  // Use an integer load+store unless we can find something better.
138  unsigned SrcAddrSp =
139  cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
140  unsigned DstAddrSp =
141  cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
142 
143  IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
144  Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
145  Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
146 
147  // Memcpy forces the use of i8* for the source and destination. That means
148  // that if you're using memcpy to move one double around, you'll get a cast
149  // from double* to i8*. We'd much rather use a double load+store rather than
150  // an i64 load+store, here because this improves the odds that the source or
151  // dest address will be promotable. See if we can find a better type than the
152  // integer datatype.
153  Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts();
154  MDNode *CopyMD = nullptr;
155  if (StrippedDest != MI->getArgOperand(0)) {
156  Type *SrcETy = cast<PointerType>(StrippedDest->getType())
157  ->getElementType();
158  if (SrcETy->isSized() && DL.getTypeStoreSize(SrcETy) == Size) {
159  // The SrcETy might be something like {{{double}}} or [1 x double]. Rip
160  // down through these levels if so.
161  SrcETy = reduceToSingleValueType(SrcETy);
162 
163  if (SrcETy->isSingleValueType()) {
164  NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp);
165  NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp);
166 
167  // If the memcpy has metadata describing the members, see if we can
168  // get the TBAA tag describing our copy.
169  if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
170  if (M->getNumOperands() == 3 && M->getOperand(0) &&
171  mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
172  mdconst::extract<ConstantInt>(M->getOperand(0))->isNullValue() &&
173  M->getOperand(1) &&
174  mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
175  mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
176  Size &&
177  M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
178  CopyMD = cast<MDNode>(M->getOperand(2));
179  }
180  }
181  }
182  }
183 
184  // If the memcpy/memmove provides better alignment info than we can
185  // infer, use it.
186  SrcAlign = std::max(SrcAlign, CopyAlign);
187  DstAlign = std::max(DstAlign, CopyAlign);
188 
189  Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
190  Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
191  LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile());
192  L->setAlignment(SrcAlign);
193  if (CopyMD)
194  L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
195  MDNode *LoopMemParallelMD =
197  if (LoopMemParallelMD)
199 
200  StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile());
201  S->setAlignment(DstAlign);
202  if (CopyMD)
203  S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
204  if (LoopMemParallelMD)
206 
207  // Set the size of the copy to 0, it will be deleted on the next iteration.
208  MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType()));
209  return MI;
210 }
211 
212 Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
213  unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
214  if (MI->getAlignment() < Alignment) {
215  MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
216  Alignment, false));
217  return MI;
218  }
219 
220  // Extract the length and alignment and fill if they are constant.
221  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
222  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
223  if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
224  return nullptr;
225  uint64_t Len = LenC->getLimitedValue();
226  Alignment = MI->getAlignment();
227  assert(Len && "0-sized memory setting should be removed already.");
228 
229  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
230  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
231  Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
232 
233  Value *Dest = MI->getDest();
234  unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
235  Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
236  Dest = Builder->CreateBitCast(Dest, NewDstPtrTy);
237 
238  // Alignment 0 is identity for alignment 1 for memset, but not store.
239  if (Alignment == 0) Alignment = 1;
240 
241  // Extract the fill value and store.
242  uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
243  StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest,
244  MI->isVolatile());
245  S->setAlignment(Alignment);
246 
247  // Set the size of the copy to 0, it will be deleted on the next iteration.
248  MI->setLength(Constant::getNullValue(LenC->getType()));
249  return MI;
250  }
251 
252  return nullptr;
253 }
254 
256  InstCombiner::BuilderTy &Builder) {
257  bool LogicalShift = false;
258  bool ShiftLeft = false;
259 
260  switch (II.getIntrinsicID()) {
261  default: llvm_unreachable("Unexpected intrinsic!");
262  case Intrinsic::x86_sse2_psra_d:
263  case Intrinsic::x86_sse2_psra_w:
264  case Intrinsic::x86_sse2_psrai_d:
265  case Intrinsic::x86_sse2_psrai_w:
266  case Intrinsic::x86_avx2_psra_d:
267  case Intrinsic::x86_avx2_psra_w:
268  case Intrinsic::x86_avx2_psrai_d:
269  case Intrinsic::x86_avx2_psrai_w:
270  case Intrinsic::x86_avx512_psra_q_128:
271  case Intrinsic::x86_avx512_psrai_q_128:
272  case Intrinsic::x86_avx512_psra_q_256:
273  case Intrinsic::x86_avx512_psrai_q_256:
274  case Intrinsic::x86_avx512_psra_d_512:
275  case Intrinsic::x86_avx512_psra_q_512:
276  case Intrinsic::x86_avx512_psra_w_512:
277  case Intrinsic::x86_avx512_psrai_d_512:
278  case Intrinsic::x86_avx512_psrai_q_512:
279  case Intrinsic::x86_avx512_psrai_w_512:
280  LogicalShift = false; ShiftLeft = false;
281  break;
282  case Intrinsic::x86_sse2_psrl_d:
283  case Intrinsic::x86_sse2_psrl_q:
284  case Intrinsic::x86_sse2_psrl_w:
285  case Intrinsic::x86_sse2_psrli_d:
286  case Intrinsic::x86_sse2_psrli_q:
287  case Intrinsic::x86_sse2_psrli_w:
288  case Intrinsic::x86_avx2_psrl_d:
289  case Intrinsic::x86_avx2_psrl_q:
290  case Intrinsic::x86_avx2_psrl_w:
291  case Intrinsic::x86_avx2_psrli_d:
292  case Intrinsic::x86_avx2_psrli_q:
293  case Intrinsic::x86_avx2_psrli_w:
294  case Intrinsic::x86_avx512_psrl_d_512:
295  case Intrinsic::x86_avx512_psrl_q_512:
296  case Intrinsic::x86_avx512_psrl_w_512:
297  case Intrinsic::x86_avx512_psrli_d_512:
298  case Intrinsic::x86_avx512_psrli_q_512:
299  case Intrinsic::x86_avx512_psrli_w_512:
300  LogicalShift = true; ShiftLeft = false;
301  break;
302  case Intrinsic::x86_sse2_psll_d:
303  case Intrinsic::x86_sse2_psll_q:
304  case Intrinsic::x86_sse2_psll_w:
305  case Intrinsic::x86_sse2_pslli_d:
306  case Intrinsic::x86_sse2_pslli_q:
307  case Intrinsic::x86_sse2_pslli_w:
308  case Intrinsic::x86_avx2_psll_d:
309  case Intrinsic::x86_avx2_psll_q:
310  case Intrinsic::x86_avx2_psll_w:
311  case Intrinsic::x86_avx2_pslli_d:
312  case Intrinsic::x86_avx2_pslli_q:
313  case Intrinsic::x86_avx2_pslli_w:
314  case Intrinsic::x86_avx512_psll_d_512:
315  case Intrinsic::x86_avx512_psll_q_512:
316  case Intrinsic::x86_avx512_psll_w_512:
317  case Intrinsic::x86_avx512_pslli_d_512:
318  case Intrinsic::x86_avx512_pslli_q_512:
319  case Intrinsic::x86_avx512_pslli_w_512:
320  LogicalShift = true; ShiftLeft = true;
321  break;
322  }
323  assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
324 
325  // Simplify if count is constant.
326  auto Arg1 = II.getArgOperand(1);
327  auto CAZ = dyn_cast<ConstantAggregateZero>(Arg1);
328  auto CDV = dyn_cast<ConstantDataVector>(Arg1);
329  auto CInt = dyn_cast<ConstantInt>(Arg1);
330  if (!CAZ && !CDV && !CInt)
331  return nullptr;
332 
333  APInt Count(64, 0);
334  if (CDV) {
335  // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector
336  // operand to compute the shift amount.
337  auto VT = cast<VectorType>(CDV->getType());
338  unsigned BitWidth = VT->getElementType()->getPrimitiveSizeInBits();
339  assert((64 % BitWidth) == 0 && "Unexpected packed shift size");
340  unsigned NumSubElts = 64 / BitWidth;
341 
342  // Concatenate the sub-elements to create the 64-bit value.
343  for (unsigned i = 0; i != NumSubElts; ++i) {
344  unsigned SubEltIdx = (NumSubElts - 1) - i;
345  auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
346  Count = Count.shl(BitWidth);
347  Count |= SubElt->getValue().zextOrTrunc(64);
348  }
349  }
350  else if (CInt)
351  Count = CInt->getValue();
352 
353  auto Vec = II.getArgOperand(0);
354  auto VT = cast<VectorType>(Vec->getType());
355  auto SVT = VT->getElementType();
356  unsigned VWidth = VT->getNumElements();
357  unsigned BitWidth = SVT->getPrimitiveSizeInBits();
358 
359  // If shift-by-zero then just return the original value.
360  if (Count == 0)
361  return Vec;
362 
363  // Handle cases when Shift >= BitWidth.
364  if (Count.uge(BitWidth)) {
365  // If LogicalShift - just return zero.
366  if (LogicalShift)
367  return ConstantAggregateZero::get(VT);
368 
369  // If ArithmeticShift - clamp Shift to (BitWidth - 1).
370  Count = APInt(64, BitWidth - 1);
371  }
372 
373  // Get a constant vector of the same type as the first operand.
374  auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
375  auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
376 
377  if (ShiftLeft)
378  return Builder.CreateShl(Vec, ShiftVec);
379 
380  if (LogicalShift)
381  return Builder.CreateLShr(Vec, ShiftVec);
382 
383  return Builder.CreateAShr(Vec, ShiftVec);
384 }
385 
386 // Attempt to simplify AVX2 per-element shift intrinsics to a generic IR shift.
387 // Unlike the generic IR shifts, the intrinsics have defined behaviour for out
388 // of range shift amounts (logical - set to zero, arithmetic - splat sign bit).
390  InstCombiner::BuilderTy &Builder) {
391  bool LogicalShift = false;
392  bool ShiftLeft = false;
393 
394  switch (II.getIntrinsicID()) {
395  default: llvm_unreachable("Unexpected intrinsic!");
396  case Intrinsic::x86_avx2_psrav_d:
397  case Intrinsic::x86_avx2_psrav_d_256:
398  case Intrinsic::x86_avx512_psrav_q_128:
399  case Intrinsic::x86_avx512_psrav_q_256:
400  case Intrinsic::x86_avx512_psrav_d_512:
401  case Intrinsic::x86_avx512_psrav_q_512:
402  case Intrinsic::x86_avx512_psrav_w_128:
403  case Intrinsic::x86_avx512_psrav_w_256:
404  case Intrinsic::x86_avx512_psrav_w_512:
405  LogicalShift = false;
406  ShiftLeft = false;
407  break;
408  case Intrinsic::x86_avx2_psrlv_d:
409  case Intrinsic::x86_avx2_psrlv_d_256:
410  case Intrinsic::x86_avx2_psrlv_q:
411  case Intrinsic::x86_avx2_psrlv_q_256:
412  case Intrinsic::x86_avx512_psrlv_d_512:
413  case Intrinsic::x86_avx512_psrlv_q_512:
414  case Intrinsic::x86_avx512_psrlv_w_128:
415  case Intrinsic::x86_avx512_psrlv_w_256:
416  case Intrinsic::x86_avx512_psrlv_w_512:
417  LogicalShift = true;
418  ShiftLeft = false;
419  break;
420  case Intrinsic::x86_avx2_psllv_d:
421  case Intrinsic::x86_avx2_psllv_d_256:
422  case Intrinsic::x86_avx2_psllv_q:
423  case Intrinsic::x86_avx2_psllv_q_256:
424  case Intrinsic::x86_avx512_psllv_d_512:
425  case Intrinsic::x86_avx512_psllv_q_512:
426  case Intrinsic::x86_avx512_psllv_w_128:
427  case Intrinsic::x86_avx512_psllv_w_256:
428  case Intrinsic::x86_avx512_psllv_w_512:
429  LogicalShift = true;
430  ShiftLeft = true;
431  break;
432  }
433  assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
434 
435  // Simplify if all shift amounts are constant/undef.
436  auto *CShift = dyn_cast<Constant>(II.getArgOperand(1));
437  if (!CShift)
438  return nullptr;
439 
440  auto Vec = II.getArgOperand(0);
441  auto VT = cast<VectorType>(II.getType());
442  auto SVT = VT->getVectorElementType();
443  int NumElts = VT->getNumElements();
444  int BitWidth = SVT->getIntegerBitWidth();
445 
446  // Collect each element's shift amount.
447  // We also collect special cases: UNDEF = -1, OUT-OF-RANGE = BitWidth.
448  bool AnyOutOfRange = false;
449  SmallVector<int, 8> ShiftAmts;
450  for (int I = 0; I < NumElts; ++I) {
451  auto *CElt = CShift->getAggregateElement(I);
452  if (CElt && isa<UndefValue>(CElt)) {
453  ShiftAmts.push_back(-1);
454  continue;
455  }
456 
457  auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
458  if (!COp)
459  return nullptr;
460 
461  // Handle out of range shifts.
462  // If LogicalShift - set to BitWidth (special case).
463  // If ArithmeticShift - set to (BitWidth - 1) (sign splat).
464  APInt ShiftVal = COp->getValue();
465  if (ShiftVal.uge(BitWidth)) {
466  AnyOutOfRange = LogicalShift;
467  ShiftAmts.push_back(LogicalShift ? BitWidth : BitWidth - 1);
468  continue;
469  }
470 
471  ShiftAmts.push_back((int)ShiftVal.getZExtValue());
472  }
473 
474  // If all elements out of range or UNDEF, return vector of zeros/undefs.
475  // ArithmeticShift should only hit this if they are all UNDEF.
476  auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); };
477  if (all_of(ShiftAmts, OutOfRange)) {
478  SmallVector<Constant *, 8> ConstantVec;
479  for (int Idx : ShiftAmts) {
480  if (Idx < 0) {
481  ConstantVec.push_back(UndefValue::get(SVT));
482  } else {
483  assert(LogicalShift && "Logical shift expected");
484  ConstantVec.push_back(ConstantInt::getNullValue(SVT));
485  }
486  }
487  return ConstantVector::get(ConstantVec);
488  }
489 
490  // We can't handle only some out of range values with generic logical shifts.
491  if (AnyOutOfRange)
492  return nullptr;
493 
494  // Build the shift amount constant vector.
495  SmallVector<Constant *, 8> ShiftVecAmts;
496  for (int Idx : ShiftAmts) {
497  if (Idx < 0)
498  ShiftVecAmts.push_back(UndefValue::get(SVT));
499  else
500  ShiftVecAmts.push_back(ConstantInt::get(SVT, Idx));
501  }
502  auto ShiftVec = ConstantVector::get(ShiftVecAmts);
503 
504  if (ShiftLeft)
505  return Builder.CreateShl(Vec, ShiftVec);
506 
507  if (LogicalShift)
508  return Builder.CreateLShr(Vec, ShiftVec);
509 
510  return Builder.CreateAShr(Vec, ShiftVec);
511 }
512 
514  InstCombiner::BuilderTy &Builder) {
515  Value *Arg = II.getArgOperand(0);
516  Type *ResTy = II.getType();
517  Type *ArgTy = Arg->getType();
518 
519  // movmsk(undef) -> zero as we must ensure the upper bits are zero.
520  if (isa<UndefValue>(Arg))
521  return Constant::getNullValue(ResTy);
522 
523  // We can't easily peek through x86_mmx types.
524  if (!ArgTy->isVectorTy())
525  return nullptr;
526 
527  auto *C = dyn_cast<Constant>(Arg);
528  if (!C)
529  return nullptr;
530 
531  // Extract signbits of the vector input and pack into integer result.
532  APInt Result(ResTy->getPrimitiveSizeInBits(), 0);
533  for (unsigned I = 0, E = ArgTy->getVectorNumElements(); I != E; ++I) {
534  auto *COp = C->getAggregateElement(I);
535  if (!COp)
536  return nullptr;
537  if (isa<UndefValue>(COp))
538  continue;
539 
540  auto *CInt = dyn_cast<ConstantInt>(COp);
541  auto *CFp = dyn_cast<ConstantFP>(COp);
542  if (!CInt && !CFp)
543  return nullptr;
544 
545  if ((CInt && CInt->isNegative()) || (CFp && CFp->isNegative()))
546  Result.setBit(I);
547  }
548 
549  return Constant::getIntegerValue(ResTy, Result);
550 }
551 
553  InstCombiner::BuilderTy &Builder) {
554  auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
555  if (!CInt)
556  return nullptr;
557 
558  VectorType *VecTy = cast<VectorType>(II.getType());
559  assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
560 
561  // The immediate permute control byte looks like this:
562  // [3:0] - zero mask for each 32-bit lane
563  // [5:4] - select one 32-bit destination lane
564  // [7:6] - select one 32-bit source lane
565 
566  uint8_t Imm = CInt->getZExtValue();
567  uint8_t ZMask = Imm & 0xf;
568  uint8_t DestLane = (Imm >> 4) & 0x3;
569  uint8_t SourceLane = (Imm >> 6) & 0x3;
570 
572 
573  // If all zero mask bits are set, this was just a weird way to
574  // generate a zero vector.
575  if (ZMask == 0xf)
576  return ZeroVector;
577 
578  // Initialize by passing all of the first source bits through.
579  uint32_t ShuffleMask[4] = { 0, 1, 2, 3 };
580 
581  // We may replace the second operand with the zero vector.
582  Value *V1 = II.getArgOperand(1);
583 
584  if (ZMask) {
585  // If the zero mask is being used with a single input or the zero mask
586  // overrides the destination lane, this is a shuffle with the zero vector.
587  if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
588  (ZMask & (1 << DestLane))) {
589  V1 = ZeroVector;
590  // We may still move 32-bits of the first source vector from one lane
591  // to another.
592  ShuffleMask[DestLane] = SourceLane;
593  // The zero mask may override the previous insert operation.
594  for (unsigned i = 0; i < 4; ++i)
595  if ((ZMask >> i) & 0x1)
596  ShuffleMask[i] = i + 4;
597  } else {
598  // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
599  return nullptr;
600  }
601  } else {
602  // Replace the selected destination lane with the selected source lane.
603  ShuffleMask[DestLane] = SourceLane + 4;
604  }
605 
606  return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
607 }
608 
609 /// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding
610 /// or conversion to a shuffle vector.
612  ConstantInt *CILength, ConstantInt *CIIndex,
613  InstCombiner::BuilderTy &Builder) {
614  auto LowConstantHighUndef = [&](uint64_t Val) {
615  Type *IntTy64 = Type::getInt64Ty(II.getContext());
616  Constant *Args[] = {ConstantInt::get(IntTy64, Val),
617  UndefValue::get(IntTy64)};
618  return ConstantVector::get(Args);
619  };
620 
621  // See if we're dealing with constant values.
622  Constant *C0 = dyn_cast<Constant>(Op0);
623  ConstantInt *CI0 =
624  C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
625  : nullptr;
626 
627  // Attempt to constant fold.
628  if (CILength && CIIndex) {
629  // From AMD documentation: "The bit index and field length are each six
630  // bits in length other bits of the field are ignored."
631  APInt APIndex = CIIndex->getValue().zextOrTrunc(6);
632  APInt APLength = CILength->getValue().zextOrTrunc(6);
633 
634  unsigned Index = APIndex.getZExtValue();
635 
636  // From AMD documentation: "a value of zero in the field length is
637  // defined as length of 64".
638  unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
639 
640  // From AMD documentation: "If the sum of the bit index + length field
641  // is greater than 64, the results are undefined".
642  unsigned End = Index + Length;
643 
644  // Note that both field index and field length are 8-bit quantities.
645  // Since variables 'Index' and 'Length' are unsigned values
646  // obtained from zero-extending field index and field length
647  // respectively, their sum should never wrap around.
648  if (End > 64)
649  return UndefValue::get(II.getType());
650 
651  // If we are inserting whole bytes, we can convert this to a shuffle.
652  // Lowering can recognize EXTRQI shuffle masks.
653  if ((Length % 8) == 0 && (Index % 8) == 0) {
654  // Convert bit indices to byte indices.
655  Length /= 8;
656  Index /= 8;
657 
658  Type *IntTy8 = Type::getInt8Ty(II.getContext());
659  Type *IntTy32 = Type::getInt32Ty(II.getContext());
660  VectorType *ShufTy = VectorType::get(IntTy8, 16);
661 
662  SmallVector<Constant *, 16> ShuffleMask;
663  for (int i = 0; i != (int)Length; ++i)
664  ShuffleMask.push_back(
665  Constant::getIntegerValue(IntTy32, APInt(32, i + Index)));
666  for (int i = Length; i != 8; ++i)
667  ShuffleMask.push_back(
668  Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
669  for (int i = 8; i != 16; ++i)
670  ShuffleMask.push_back(UndefValue::get(IntTy32));
671 
672  Value *SV = Builder.CreateShuffleVector(
673  Builder.CreateBitCast(Op0, ShufTy),
674  ConstantAggregateZero::get(ShufTy), ConstantVector::get(ShuffleMask));
675  return Builder.CreateBitCast(SV, II.getType());
676  }
677 
678  // Constant Fold - shift Index'th bit to lowest position and mask off
679  // Length bits.
680  if (CI0) {
681  APInt Elt = CI0->getValue();
682  Elt = Elt.lshr(Index).zextOrTrunc(Length);
683  return LowConstantHighUndef(Elt.getZExtValue());
684  }
685 
686  // If we were an EXTRQ call, we'll save registers if we convert to EXTRQI.
687  if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
688  Value *Args[] = {Op0, CILength, CIIndex};
689  Module *M = II.getModule();
690  Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
691  return Builder.CreateCall(F, Args);
692  }
693  }
694 
695  // Constant Fold - extraction from zero is always {zero, undef}.
696  if (CI0 && CI0->equalsInt(0))
697  return LowConstantHighUndef(0);
698 
699  return nullptr;
700 }
701 
702 /// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant
703 /// folding or conversion to a shuffle vector.
705  APInt APLength, APInt APIndex,
706  InstCombiner::BuilderTy &Builder) {
707  // From AMD documentation: "The bit index and field length are each six bits
708  // in length other bits of the field are ignored."
709  APIndex = APIndex.zextOrTrunc(6);
710  APLength = APLength.zextOrTrunc(6);
711 
712  // Attempt to constant fold.
713  unsigned Index = APIndex.getZExtValue();
714 
715  // From AMD documentation: "a value of zero in the field length is
716  // defined as length of 64".
717  unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
718 
719  // From AMD documentation: "If the sum of the bit index + length field
720  // is greater than 64, the results are undefined".
721  unsigned End = Index + Length;
722 
723  // Note that both field index and field length are 8-bit quantities.
724  // Since variables 'Index' and 'Length' are unsigned values
725  // obtained from zero-extending field index and field length
726  // respectively, their sum should never wrap around.
727  if (End > 64)
728  return UndefValue::get(II.getType());
729 
730  // If we are inserting whole bytes, we can convert this to a shuffle.
731  // Lowering can recognize INSERTQI shuffle masks.
732  if ((Length % 8) == 0 && (Index % 8) == 0) {
733  // Convert bit indices to byte indices.
734  Length /= 8;
735  Index /= 8;
736 
737  Type *IntTy8 = Type::getInt8Ty(II.getContext());
738  Type *IntTy32 = Type::getInt32Ty(II.getContext());
739  VectorType *ShufTy = VectorType::get(IntTy8, 16);
740 
741  SmallVector<Constant *, 16> ShuffleMask;
742  for (int i = 0; i != (int)Index; ++i)
743  ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
744  for (int i = 0; i != (int)Length; ++i)
745  ShuffleMask.push_back(
746  Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
747  for (int i = Index + Length; i != 8; ++i)
748  ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
749  for (int i = 8; i != 16; ++i)
750  ShuffleMask.push_back(UndefValue::get(IntTy32));
751 
752  Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),
753  Builder.CreateBitCast(Op1, ShufTy),
754  ConstantVector::get(ShuffleMask));
755  return Builder.CreateBitCast(SV, II.getType());
756  }
757 
758  // See if we're dealing with constant values.
759  Constant *C0 = dyn_cast<Constant>(Op0);
760  Constant *C1 = dyn_cast<Constant>(Op1);
761  ConstantInt *CI00 =
762  C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
763  : nullptr;
764  ConstantInt *CI10 =
765  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
766  : nullptr;
767 
768  // Constant Fold - insert bottom Length bits starting at the Index'th bit.
769  if (CI00 && CI10) {
770  APInt V00 = CI00->getValue();
771  APInt V10 = CI10->getValue();
772  APInt Mask = APInt::getLowBitsSet(64, Length).shl(Index);
773  V00 = V00 & ~Mask;
774  V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index);
775  APInt Val = V00 | V10;
776  Type *IntTy64 = Type::getInt64Ty(II.getContext());
777  Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()),
778  UndefValue::get(IntTy64)};
779  return ConstantVector::get(Args);
780  }
781 
782  // If we were an INSERTQ call, we'll save demanded elements if we convert to
783  // INSERTQI.
784  if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
785  Type *IntTy8 = Type::getInt8Ty(II.getContext());
786  Constant *CILength = ConstantInt::get(IntTy8, Length, false);
787  Constant *CIIndex = ConstantInt::get(IntTy8, Index, false);
788 
789  Value *Args[] = {Op0, Op1, CILength, CIIndex};
790  Module *M = II.getModule();
791  Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
792  return Builder.CreateCall(F, Args);
793  }
794 
795  return nullptr;
796 }
797 
798 /// Attempt to convert pshufb* to shufflevector if the mask is constant.
800  InstCombiner::BuilderTy &Builder) {
802  if (!V)
803  return nullptr;
804 
805  auto *VecTy = cast<VectorType>(II.getType());
806  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
807  unsigned NumElts = VecTy->getNumElements();
808  assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
809  "Unexpected number of elements in shuffle mask!");
810 
811  // Construct a shuffle mask from constant integers or UNDEFs.
812  Constant *Indexes[64] = {nullptr};
813 
814  // Each byte in the shuffle control mask forms an index to permute the
815  // corresponding byte in the destination operand.
816  for (unsigned I = 0; I < NumElts; ++I) {
817  Constant *COp = V->getAggregateElement(I);
818  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
819  return nullptr;
820 
821  if (isa<UndefValue>(COp)) {
822  Indexes[I] = UndefValue::get(MaskEltTy);
823  continue;
824  }
825 
826  int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
827 
828  // If the most significant bit (bit[7]) of each byte of the shuffle
829  // control mask is set, then zero is written in the result byte.
830  // The zero vector is in the right-hand side of the resulting
831  // shufflevector.
832 
833  // The value of each index for the high 128-bit lane is the least
834  // significant 4 bits of the respective shuffle control byte.
835  Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0);
836  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
837  }
838 
839  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
840  auto V1 = II.getArgOperand(0);
841  auto V2 = Constant::getNullValue(VecTy);
842  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
843 }
844 
845 /// Attempt to convert vpermilvar* to shufflevector if the mask is constant.
847  InstCombiner::BuilderTy &Builder) {
849  if (!V)
850  return nullptr;
851 
852  auto *VecTy = cast<VectorType>(II.getType());
853  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
854  unsigned NumElts = VecTy->getVectorNumElements();
855  bool IsPD = VecTy->getScalarType()->isDoubleTy();
856  unsigned NumLaneElts = IsPD ? 2 : 4;
857  assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
858 
859  // Construct a shuffle mask from constant integers or UNDEFs.
860  Constant *Indexes[16] = {nullptr};
861 
862  // The intrinsics only read one or two bits, clear the rest.
863  for (unsigned I = 0; I < NumElts; ++I) {
864  Constant *COp = V->getAggregateElement(I);
865  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
866  return nullptr;
867 
868  if (isa<UndefValue>(COp)) {
869  Indexes[I] = UndefValue::get(MaskEltTy);
870  continue;
871  }
872 
873  APInt Index = cast<ConstantInt>(COp)->getValue();
874  Index = Index.zextOrTrunc(32).getLoBits(2);
875 
876  // The PD variants uses bit 1 to select per-lane element index, so
877  // shift down to convert to generic shuffle mask index.
878  if (IsPD)
879  Index = Index.lshr(1);
880 
881  // The _256 variants are a bit trickier since the mask bits always index
882  // into the corresponding 128 half. In order to convert to a generic
883  // shuffle, we have to make that explicit.
884  Index += APInt(32, (I / NumLaneElts) * NumLaneElts);
885 
886  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
887  }
888 
889  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
890  auto V1 = II.getArgOperand(0);
891  auto V2 = UndefValue::get(V1->getType());
892  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
893 }
894 
895 /// Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
897  InstCombiner::BuilderTy &Builder) {
898  auto *V = dyn_cast<Constant>(II.getArgOperand(1));
899  if (!V)
900  return nullptr;
901 
902  auto *VecTy = cast<VectorType>(II.getType());
903  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
904  unsigned Size = VecTy->getNumElements();
905  assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) &&
906  "Unexpected shuffle mask size");
907 
908  // Construct a shuffle mask from constant integers or UNDEFs.
909  Constant *Indexes[64] = {nullptr};
910 
911  for (unsigned I = 0; I < Size; ++I) {
912  Constant *COp = V->getAggregateElement(I);
913  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
914  return nullptr;
915 
916  if (isa<UndefValue>(COp)) {
917  Indexes[I] = UndefValue::get(MaskEltTy);
918  continue;
919  }
920 
921  uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
922  Index &= Size - 1;
923  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
924  }
925 
926  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, Size));
927  auto V1 = II.getArgOperand(0);
928  auto V2 = UndefValue::get(VecTy);
929  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
930 }
931 
932 /// The shuffle mask for a perm2*128 selects any two halves of two 256-bit
933 /// source vectors, unless a zero bit is set. If a zero bit is set,
934 /// then ignore that half of the mask and clear that half of the vector.
936  InstCombiner::BuilderTy &Builder) {
937  auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
938  if (!CInt)
939  return nullptr;
940 
941  VectorType *VecTy = cast<VectorType>(II.getType());
943 
944  // The immediate permute control byte looks like this:
945  // [1:0] - select 128 bits from sources for low half of destination
946  // [2] - ignore
947  // [3] - zero low half of destination
948  // [5:4] - select 128 bits from sources for high half of destination
949  // [6] - ignore
950  // [7] - zero high half of destination
951 
952  uint8_t Imm = CInt->getZExtValue();
953 
954  bool LowHalfZero = Imm & 0x08;
955  bool HighHalfZero = Imm & 0x80;
956 
957  // If both zero mask bits are set, this was just a weird way to
958  // generate a zero vector.
959  if (LowHalfZero && HighHalfZero)
960  return ZeroVector;
961 
962  // If 0 or 1 zero mask bits are set, this is a simple shuffle.
963  unsigned NumElts = VecTy->getNumElements();
964  unsigned HalfSize = NumElts / 2;
965  SmallVector<uint32_t, 8> ShuffleMask(NumElts);
966 
967  // The high bit of the selection field chooses the 1st or 2nd operand.
968  bool LowInputSelect = Imm & 0x02;
969  bool HighInputSelect = Imm & 0x20;
970 
971  // The low bit of the selection field chooses the low or high half
972  // of the selected operand.
973  bool LowHalfSelect = Imm & 0x01;
974  bool HighHalfSelect = Imm & 0x10;
975 
976  // Determine which operand(s) are actually in use for this instruction.
977  Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
978  Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
979 
980  // If needed, replace operands based on zero mask.
981  V0 = LowHalfZero ? ZeroVector : V0;
982  V1 = HighHalfZero ? ZeroVector : V1;
983 
984  // Permute low half of result.
985  unsigned StartIndex = LowHalfSelect ? HalfSize : 0;
986  for (unsigned i = 0; i < HalfSize; ++i)
987  ShuffleMask[i] = StartIndex + i;
988 
989  // Permute high half of result.
990  StartIndex = HighHalfSelect ? HalfSize : 0;
991  StartIndex += NumElts;
992  for (unsigned i = 0; i < HalfSize; ++i)
993  ShuffleMask[i + HalfSize] = StartIndex + i;
994 
995  return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
996 }
997 
998 /// Decode XOP integer vector comparison intrinsics.
1000  InstCombiner::BuilderTy &Builder,
1001  bool IsSigned) {
1002  if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
1003  uint64_t Imm = CInt->getZExtValue() & 0x7;
1004  VectorType *VecTy = cast<VectorType>(II.getType());
1006 
1007  switch (Imm) {
1008  case 0x0:
1009  Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1010  break;
1011  case 0x1:
1012  Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1013  break;
1014  case 0x2:
1015  Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1016  break;
1017  case 0x3:
1018  Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1019  break;
1020  case 0x4:
1021  Pred = ICmpInst::ICMP_EQ; break;
1022  case 0x5:
1023  Pred = ICmpInst::ICMP_NE; break;
1024  case 0x6:
1025  return ConstantInt::getSigned(VecTy, 0); // FALSE
1026  case 0x7:
1027  return ConstantInt::getSigned(VecTy, -1); // TRUE
1028  }
1029 
1030  if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0),
1031  II.getArgOperand(1)))
1032  return Builder.CreateSExtOrTrunc(Cmp, VecTy);
1033  }
1034  return nullptr;
1035 }
1036 
1037 // Emit a select instruction and appropriate bitcasts to help simplify
1038 // masked intrinsics.
1040  InstCombiner::BuilderTy &Builder) {
1041  unsigned VWidth = Op0->getType()->getVectorNumElements();
1042 
1043  // If the mask is all ones we don't need the select. But we need to check
1044  // only the bit thats will be used in case VWidth is less than 8.
1045  if (auto *C = dyn_cast<ConstantInt>(Mask))
1046  if (C->getValue().zextOrTrunc(VWidth).isAllOnesValue())
1047  return Op0;
1048 
1049  auto *MaskTy = VectorType::get(Builder.getInt1Ty(),
1050  cast<IntegerType>(Mask->getType())->getBitWidth());
1051  Mask = Builder.CreateBitCast(Mask, MaskTy);
1052 
1053  // If we have less than 8 elements, then the starting mask was an i8 and
1054  // we need to extract down to the right number of elements.
1055  if (VWidth < 8) {
1056  uint32_t Indices[4];
1057  for (unsigned i = 0; i != VWidth; ++i)
1058  Indices[i] = i;
1059  Mask = Builder.CreateShuffleVector(Mask, Mask,
1060  makeArrayRef(Indices, VWidth),
1061  "extract");
1062  }
1063 
1064  return Builder.CreateSelect(Mask, Op0, Op1);
1065 }
1066 
1068  Value *Arg0 = II.getArgOperand(0);
1069  Value *Arg1 = II.getArgOperand(1);
1070 
1071  // fmin(x, x) -> x
1072  if (Arg0 == Arg1)
1073  return Arg0;
1074 
1075  const auto *C1 = dyn_cast<ConstantFP>(Arg1);
1076 
1077  // fmin(x, nan) -> x
1078  if (C1 && C1->isNaN())
1079  return Arg0;
1080 
1081  // This is the value because if undef were NaN, we would return the other
1082  // value and cannot return a NaN unless both operands are.
1083  //
1084  // fmin(undef, x) -> x
1085  if (isa<UndefValue>(Arg0))
1086  return Arg1;
1087 
1088  // fmin(x, undef) -> x
1089  if (isa<UndefValue>(Arg1))
1090  return Arg0;
1091 
1092  Value *X = nullptr;
1093  Value *Y = nullptr;
1094  if (II.getIntrinsicID() == Intrinsic::minnum) {
1095  // fmin(x, fmin(x, y)) -> fmin(x, y)
1096  // fmin(y, fmin(x, y)) -> fmin(x, y)
1097  if (match(Arg1, m_FMin(m_Value(X), m_Value(Y)))) {
1098  if (Arg0 == X || Arg0 == Y)
1099  return Arg1;
1100  }
1101 
1102  // fmin(fmin(x, y), x) -> fmin(x, y)
1103  // fmin(fmin(x, y), y) -> fmin(x, y)
1104  if (match(Arg0, m_FMin(m_Value(X), m_Value(Y)))) {
1105  if (Arg1 == X || Arg1 == Y)
1106  return Arg0;
1107  }
1108 
1109  // TODO: fmin(nnan x, inf) -> x
1110  // TODO: fmin(nnan ninf x, flt_max) -> x
1111  if (C1 && C1->isInfinity()) {
1112  // fmin(x, -inf) -> -inf
1113  if (C1->isNegative())
1114  return Arg1;
1115  }
1116  } else {
1118  // fmax(x, fmax(x, y)) -> fmax(x, y)
1119  // fmax(y, fmax(x, y)) -> fmax(x, y)
1120  if (match(Arg1, m_FMax(m_Value(X), m_Value(Y)))) {
1121  if (Arg0 == X || Arg0 == Y)
1122  return Arg1;
1123  }
1124 
1125  // fmax(fmax(x, y), x) -> fmax(x, y)
1126  // fmax(fmax(x, y), y) -> fmax(x, y)
1127  if (match(Arg0, m_FMax(m_Value(X), m_Value(Y)))) {
1128  if (Arg1 == X || Arg1 == Y)
1129  return Arg0;
1130  }
1131 
1132  // TODO: fmax(nnan x, -inf) -> x
1133  // TODO: fmax(nnan ninf x, -flt_max) -> x
1134  if (C1 && C1->isInfinity()) {
1135  // fmax(x, inf) -> inf
1136  if (!C1->isNegative())
1137  return Arg1;
1138  }
1139  }
1140  return nullptr;
1141 }
1142 
1144  auto *ConstMask = dyn_cast<Constant>(Mask);
1145  if (!ConstMask)
1146  return false;
1147  if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1148  return true;
1149  for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
1150  ++I) {
1151  if (auto *MaskElt = ConstMask->getAggregateElement(I))
1152  if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1153  continue;
1154  return false;
1155  }
1156  return true;
1157 }
1158 
1160  InstCombiner::BuilderTy &Builder) {
1161  // If the mask is all ones or undefs, this is a plain vector load of the 1st
1162  // argument.
1163  if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
1164  Value *LoadPtr = II.getArgOperand(0);
1165  unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
1166  return Builder.CreateAlignedLoad(LoadPtr, Alignment, "unmaskedload");
1167  }
1168 
1169  return nullptr;
1170 }
1171 
1173  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
1174  if (!ConstMask)
1175  return nullptr;
1176 
1177  // If the mask is all zeros, this instruction does nothing.
1178  if (ConstMask->isNullValue())
1179  return IC.eraseInstFromFunction(II);
1180 
1181  // If the mask is all ones, this is a plain vector store of the 1st argument.
1182  if (ConstMask->isAllOnesValue()) {
1183  Value *StorePtr = II.getArgOperand(1);
1184  unsigned Alignment = cast<ConstantInt>(II.getArgOperand(2))->getZExtValue();
1185  return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
1186  }
1187 
1188  return nullptr;
1189 }
1190 
1192  // If the mask is all zeros, return the "passthru" argument of the gather.
1193  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
1194  if (ConstMask && ConstMask->isNullValue())
1195  return IC.replaceInstUsesWith(II, II.getArgOperand(3));
1196 
1197  return nullptr;
1198 }
1199 
1201  // If the mask is all zeros, a scatter does nothing.
1202  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
1203  if (ConstMask && ConstMask->isNullValue())
1204  return IC.eraseInstFromFunction(II);
1205 
1206  return nullptr;
1207 }
1208 
1210  assert((II.getIntrinsicID() == Intrinsic::cttz ||
1211  II.getIntrinsicID() == Intrinsic::ctlz) &&
1212  "Expected cttz or ctlz intrinsic");
1213  Value *Op0 = II.getArgOperand(0);
1214  // FIXME: Try to simplify vectors of integers.
1215  auto *IT = dyn_cast<IntegerType>(Op0->getType());
1216  if (!IT)
1217  return nullptr;
1218 
1219  unsigned BitWidth = IT->getBitWidth();
1220  APInt KnownZero(BitWidth, 0);
1221  APInt KnownOne(BitWidth, 0);
1222  IC.computeKnownBits(Op0, KnownZero, KnownOne, 0, &II);
1223 
1224  // Create a mask for bits above (ctlz) or below (cttz) the first known one.
1225  bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
1226  unsigned NumMaskBits = IsTZ ? KnownOne.countTrailingZeros()
1227  : KnownOne.countLeadingZeros();
1228  APInt Mask = IsTZ ? APInt::getLowBitsSet(BitWidth, NumMaskBits)
1229  : APInt::getHighBitsSet(BitWidth, NumMaskBits);
1230 
1231  // If all bits above (ctlz) or below (cttz) the first known one are known
1232  // zero, this value is constant.
1233  // FIXME: This should be in InstSimplify because we're replacing an
1234  // instruction with a constant.
1235  if ((Mask & KnownZero) == Mask) {
1236  auto *C = ConstantInt::get(IT, APInt(BitWidth, NumMaskBits));
1237  return IC.replaceInstUsesWith(II, C);
1238  }
1239 
1240  // If the input to cttz/ctlz is known to be non-zero,
1241  // then change the 'ZeroIsUndef' parameter to 'true'
1242  // because we know the zero behavior can't affect the result.
1243  if (KnownOne != 0 || isKnownNonZero(Op0, IC.getDataLayout())) {
1244  if (!match(II.getArgOperand(1), m_One())) {
1245  II.setOperand(1, IC.Builder->getTrue());
1246  return &II;
1247  }
1248  }
1249 
1250  return nullptr;
1251 }
1252 
1253 // TODO: If the x86 backend knew how to convert a bool vector mask back to an
1254 // XMM register mask efficiently, we could transform all x86 masked intrinsics
1255 // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
1257  Value *Ptr = II.getOperand(0);
1258  Value *Mask = II.getOperand(1);
1259  Constant *ZeroVec = Constant::getNullValue(II.getType());
1260 
1261  // Special case a zero mask since that's not a ConstantDataVector.
1262  // This masked load instruction creates a zero vector.
1263  if (isa<ConstantAggregateZero>(Mask))
1264  return IC.replaceInstUsesWith(II, ZeroVec);
1265 
1266  auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
1267  if (!ConstMask)
1268  return nullptr;
1269 
1270  // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
1271  // to allow target-independent optimizations.
1272 
1273  // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
1274  // the LLVM intrinsic definition for the pointer argument.
1275  unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
1276  PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace);
1277  Value *PtrCast = IC.Builder->CreateBitCast(Ptr, VecPtrTy, "castvec");
1278 
1279  // Second, convert the x86 XMM integer vector mask to a vector of bools based
1280  // on each element's most significant bit (the sign bit).
1281  Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
1282 
1283  // The pass-through vector for an x86 masked load is a zero vector.
1284  CallInst *NewMaskedLoad =
1285  IC.Builder->CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec);
1286  return IC.replaceInstUsesWith(II, NewMaskedLoad);
1287 }
1288 
1289 // TODO: If the x86 backend knew how to convert a bool vector mask back to an
1290 // XMM register mask efficiently, we could transform all x86 masked intrinsics
1291 // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
1293  Value *Ptr = II.getOperand(0);
1294  Value *Mask = II.getOperand(1);
1295  Value *Vec = II.getOperand(2);
1296 
1297  // Special case a zero mask since that's not a ConstantDataVector:
1298  // this masked store instruction does nothing.
1299  if (isa<ConstantAggregateZero>(Mask)) {
1300  IC.eraseInstFromFunction(II);
1301  return true;
1302  }
1303 
1304  // The SSE2 version is too weird (eg, unaligned but non-temporal) to do
1305  // anything else at this level.
1306  if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
1307  return false;
1308 
1309  auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
1310  if (!ConstMask)
1311  return false;
1312 
1313  // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
1314  // to allow target-independent optimizations.
1315 
1316  // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
1317  // the LLVM intrinsic definition for the pointer argument.
1318  unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
1319  PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace);
1320  Value *PtrCast = IC.Builder->CreateBitCast(Ptr, VecPtrTy, "castvec");
1321 
1322  // Second, convert the x86 XMM integer vector mask to a vector of bools based
1323  // on each element's most significant bit (the sign bit).
1324  Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
1325 
1326  IC.Builder->CreateMaskedStore(Vec, PtrCast, 1, BoolMask);
1327 
1328  // 'Replace uses' doesn't work for stores. Erase the original masked store.
1329  IC.eraseInstFromFunction(II);
1330  return true;
1331 }
1332 
1333 // Returns true iff the 2 intrinsics have the same operands, limiting the
1334 // comparison to the first NumOperands.
1335 static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
1336  unsigned NumOperands) {
1337  assert(I.getNumArgOperands() >= NumOperands && "Not enough operands");
1338  assert(E.getNumArgOperands() >= NumOperands && "Not enough operands");
1339  for (unsigned i = 0; i < NumOperands; i++)
1340  if (I.getArgOperand(i) != E.getArgOperand(i))
1341  return false;
1342  return true;
1343 }
1344 
1345 // Remove trivially empty start/end intrinsic ranges, i.e. a start
1346 // immediately followed by an end (ignoring debuginfo or other
1347 // start/end intrinsics in between). As this handles only the most trivial
1348 // cases, tracking the nesting level is not needed:
1349 //
1350 // call @llvm.foo.start(i1 0) ; &I
1351 // call @llvm.foo.start(i1 0)
1352 // call @llvm.foo.end(i1 0) ; This one will not be skipped: it will be removed
1353 // call @llvm.foo.end(i1 0)
1354 static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID,
1355  unsigned EndID, InstCombiner &IC) {
1356  assert(I.getIntrinsicID() == StartID &&
1357  "Start intrinsic does not have expected ID");
1358  BasicBlock::iterator BI(I), BE(I.getParent()->end());
1359  for (++BI; BI != BE; ++BI) {
1360  if (auto *E = dyn_cast<IntrinsicInst>(BI)) {
1361  if (isa<DbgInfoIntrinsic>(E) || E->getIntrinsicID() == StartID)
1362  continue;
1363  if (E->getIntrinsicID() == EndID &&
1364  haveSameOperands(I, *E, E->getNumArgOperands())) {
1365  IC.eraseInstFromFunction(*E);
1366  IC.eraseInstFromFunction(I);
1367  return true;
1368  }
1369  }
1370  break;
1371  }
1372 
1373  return false;
1374 }
1375 
1377  removeTriviallyEmptyRange(I, Intrinsic::vastart, Intrinsic::vaend, *this);
1378  return nullptr;
1379 }
1380 
1382  removeTriviallyEmptyRange(I, Intrinsic::vacopy, Intrinsic::vaend, *this);
1383  return nullptr;
1384 }
1385 
1386 /// CallInst simplification. This mostly only handles folding of intrinsic
1387 /// instructions. For normal calls, it allows visitCallSite to do the heavy
1388 /// lifting.
1390  auto Args = CI.arg_operands();
1391  if (Value *V = SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), DL,
1392  &TLI, &DT, &AC))
1393  return replaceInstUsesWith(CI, V);
1394 
1395  if (isFreeCall(&CI, &TLI))
1396  return visitFree(CI);
1397 
1398  // If the caller function is nounwind, mark the call as nounwind, even if the
1399  // callee isn't.
1400  if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1401  CI.setDoesNotThrow();
1402  return &CI;
1403  }
1404 
1405  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
1406  if (!II) return visitCallSite(&CI);
1407 
1408  // Intrinsics cannot occur in an invoke, so handle them here instead of in
1409  // visitCallSite.
1410  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
1411  bool Changed = false;
1412 
1413  // memmove/cpy/set of zero bytes is a noop.
1414  if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
1415  if (NumBytes->isNullValue())
1416  return eraseInstFromFunction(CI);
1417 
1418  if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
1419  if (CI->getZExtValue() == 1) {
1420  // Replace the instruction with just byte operations. We would
1421  // transform other cases to loads/stores, but we don't know if
1422  // alignment is sufficient.
1423  }
1424  }
1425 
1426  // No other transformations apply to volatile transfers.
1427  if (MI->isVolatile())
1428  return nullptr;
1429 
1430  // If we have a memmove and the source operation is a constant global,
1431  // then the source and dest pointers can't alias, so we can change this
1432  // into a call to memcpy.
1433  if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
1434  if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1435  if (GVSrc->isConstant()) {
1436  Module *M = CI.getModule();
1437  Intrinsic::ID MemCpyID = Intrinsic::memcpy;
1438  Type *Tys[3] = { CI.getArgOperand(0)->getType(),
1439  CI.getArgOperand(1)->getType(),
1440  CI.getArgOperand(2)->getType() };
1441  CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
1442  Changed = true;
1443  }
1444  }
1445 
1446  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
1447  // memmove(x,x,size) -> noop.
1448  if (MTI->getSource() == MTI->getDest())
1449  return eraseInstFromFunction(CI);
1450  }
1451 
1452  // If we can determine a pointer alignment that is bigger than currently
1453  // set, update the alignment.
1454  if (isa<MemTransferInst>(MI)) {
1455  if (Instruction *I = SimplifyMemTransfer(MI))
1456  return I;
1457  } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
1458  if (Instruction *I = SimplifyMemSet(MSI))
1459  return I;
1460  }
1461 
1462  if (Changed) return II;
1463  }
1464 
1465  auto SimplifyDemandedVectorEltsLow = [this](Value *Op, unsigned Width,
1466  unsigned DemandedWidth) {
1467  APInt UndefElts(Width, 0);
1468  APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth);
1469  return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
1470  };
1471 
1472  switch (II->getIntrinsicID()) {
1473  default: break;
1474  case Intrinsic::objectsize:
1475  if (ConstantInt *N =
1476  lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
1477  return replaceInstUsesWith(CI, N);
1478  return nullptr;
1479 
1480  case Intrinsic::bswap: {
1481  Value *IIOperand = II->getArgOperand(0);
1482  Value *X = nullptr;
1483 
1484  // bswap(bswap(x)) -> x
1485  if (match(IIOperand, m_BSwap(m_Value(X))))
1486  return replaceInstUsesWith(CI, X);
1487 
1488  // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
1489  if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
1490  unsigned C = X->getType()->getPrimitiveSizeInBits() -
1491  IIOperand->getType()->getPrimitiveSizeInBits();
1492  Value *CV = ConstantInt::get(X->getType(), C);
1493  Value *V = Builder->CreateLShr(X, CV);
1494  return new TruncInst(V, IIOperand->getType());
1495  }
1496  break;
1497  }
1498 
1499  case Intrinsic::bitreverse: {
1500  Value *IIOperand = II->getArgOperand(0);
1501  Value *X = nullptr;
1502 
1503  // bitreverse(bitreverse(x)) -> x
1504  if (match(IIOperand, m_Intrinsic<Intrinsic::bitreverse>(m_Value(X))))
1505  return replaceInstUsesWith(CI, X);
1506  break;
1507  }
1508 
1509  case Intrinsic::masked_load:
1510  if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, *Builder))
1511  return replaceInstUsesWith(CI, SimplifiedMaskedOp);
1512  break;
1513  case Intrinsic::masked_store:
1514  return simplifyMaskedStore(*II, *this);
1515  case Intrinsic::masked_gather:
1516  return simplifyMaskedGather(*II, *this);
1517  case Intrinsic::masked_scatter:
1518  return simplifyMaskedScatter(*II, *this);
1519 
1520  case Intrinsic::powi:
1521  if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
1522  // powi(x, 0) -> 1.0
1523  if (Power->isZero())
1524  return replaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
1525  // powi(x, 1) -> x
1526  if (Power->isOne())
1527  return replaceInstUsesWith(CI, II->getArgOperand(0));
1528  // powi(x, -1) -> 1/x
1529  if (Power->isAllOnesValue())
1530  return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
1531  II->getArgOperand(0));
1532  }
1533  break;
1534 
1535  case Intrinsic::cttz:
1536  case Intrinsic::ctlz:
1537  if (auto *I = foldCttzCtlz(*II, *this))
1538  return I;
1539  break;
1540 
1541  case Intrinsic::uadd_with_overflow:
1542  case Intrinsic::sadd_with_overflow:
1543  case Intrinsic::umul_with_overflow:
1544  case Intrinsic::smul_with_overflow:
1545  if (isa<Constant>(II->getArgOperand(0)) &&
1546  !isa<Constant>(II->getArgOperand(1))) {
1547  // Canonicalize constants into the RHS.
1548  Value *LHS = II->getArgOperand(0);
1549  II->setArgOperand(0, II->getArgOperand(1));
1550  II->setArgOperand(1, LHS);
1551  return II;
1552  }
1554 
1555  case Intrinsic::usub_with_overflow:
1556  case Intrinsic::ssub_with_overflow: {
1557  OverflowCheckFlavor OCF =
1559  assert(OCF != OCF_INVALID && "unexpected!");
1560 
1561  Value *OperationResult = nullptr;
1562  Constant *OverflowResult = nullptr;
1563  if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
1564  *II, OperationResult, OverflowResult))
1565  return CreateOverflowTuple(II, OperationResult, OverflowResult);
1566 
1567  break;
1568  }
1569 
1570  case Intrinsic::minnum:
1571  case Intrinsic::maxnum: {
1572  Value *Arg0 = II->getArgOperand(0);
1573  Value *Arg1 = II->getArgOperand(1);
1574  // Canonicalize constants to the RHS.
1575  if (isa<ConstantFP>(Arg0) && !isa<ConstantFP>(Arg1)) {
1576  II->setArgOperand(0, Arg1);
1577  II->setArgOperand(1, Arg0);
1578  return II;
1579  }
1580  if (Value *V = simplifyMinnumMaxnum(*II))
1581  return replaceInstUsesWith(*II, V);
1582  break;
1583  }
1584  case Intrinsic::fma:
1585  case Intrinsic::fmuladd: {
1586  Value *Src0 = II->getArgOperand(0);
1587  Value *Src1 = II->getArgOperand(1);
1588 
1589  // Canonicalize constants into the RHS.
1590  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
1591  II->setArgOperand(0, Src1);
1592  II->setArgOperand(1, Src0);
1593  std::swap(Src0, Src1);
1594  }
1595 
1596  Value *LHS = nullptr;
1597  Value *RHS = nullptr;
1598 
1599  // fma fneg(x), fneg(y), z -> fma x, y, z
1600  if (match(Src0, m_FNeg(m_Value(LHS))) &&
1601  match(Src1, m_FNeg(m_Value(RHS)))) {
1602  II->setArgOperand(0, LHS);
1603  II->setArgOperand(1, RHS);
1604  return II;
1605  }
1606 
1607  // fma fabs(x), fabs(x), z -> fma x, x, z
1608  if (match(Src0, m_Intrinsic<Intrinsic::fabs>(m_Value(LHS))) &&
1609  match(Src1, m_Intrinsic<Intrinsic::fabs>(m_Value(RHS))) && LHS == RHS) {
1610  II->setArgOperand(0, LHS);
1611  II->setArgOperand(1, RHS);
1612  return II;
1613  }
1614 
1615  // fma x, 1, z -> fadd x, z
1616  if (match(Src1, m_FPOne())) {
1617  Instruction *RI = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2));
1618  RI->copyFastMathFlags(II);
1619  return RI;
1620  }
1621 
1622  break;
1623  }
1624  case Intrinsic::fabs: {
1625  Value *Cond;
1626  Constant *LHS, *RHS;
1627  if (match(II->getArgOperand(0),
1628  m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) {
1629  CallInst *Call0 = Builder->CreateCall(II->getCalledFunction(), {LHS});
1630  CallInst *Call1 = Builder->CreateCall(II->getCalledFunction(), {RHS});
1631  return SelectInst::Create(Cond, Call0, Call1);
1632  }
1633 
1634  break;
1635  }
1636  case Intrinsic::cos:
1637  case Intrinsic::amdgcn_cos: {
1638  Value *SrcSrc;
1639  Value *Src = II->getArgOperand(0);
1640  if (match(Src, m_FNeg(m_Value(SrcSrc))) ||
1641  match(Src, m_Intrinsic<Intrinsic::fabs>(m_Value(SrcSrc)))) {
1642  // cos(-x) -> cos(x)
1643  // cos(fabs(x)) -> cos(x)
1644  II->setArgOperand(0, SrcSrc);
1645  return II;
1646  }
1647 
1648  break;
1649  }
1650  case Intrinsic::ppc_altivec_lvx:
1651  case Intrinsic::ppc_altivec_lvxl:
1652  // Turn PPC lvx -> load if the pointer is known aligned.
1653  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
1654  &DT) >= 16) {
1655  Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
1656  PointerType::getUnqual(II->getType()));
1657  return new LoadInst(Ptr);
1658  }
1659  break;
1660  case Intrinsic::ppc_vsx_lxvw4x:
1661  case Intrinsic::ppc_vsx_lxvd2x: {
1662  // Turn PPC VSX loads into normal loads.
1663  Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
1664  PointerType::getUnqual(II->getType()));
1665  return new LoadInst(Ptr, Twine(""), false, 1);
1666  }
1667  case Intrinsic::ppc_altivec_stvx:
1668  case Intrinsic::ppc_altivec_stvxl:
1669  // Turn stvx -> store if the pointer is known aligned.
1670  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
1671  &DT) >= 16) {
1672  Type *OpPtrTy =
1673  PointerType::getUnqual(II->getArgOperand(0)->getType());
1674  Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
1675  return new StoreInst(II->getArgOperand(0), Ptr);
1676  }
1677  break;
1678  case Intrinsic::ppc_vsx_stxvw4x:
1679  case Intrinsic::ppc_vsx_stxvd2x: {
1680  // Turn PPC VSX stores into normal stores.
1681  Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType());
1682  Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
1683  return new StoreInst(II->getArgOperand(0), Ptr, false, 1);
1684  }
1685  case Intrinsic::ppc_qpx_qvlfs:
1686  // Turn PPC QPX qvlfs -> load if the pointer is known aligned.
1687  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
1688  &DT) >= 16) {
1689  Type *VTy = VectorType::get(Builder->getFloatTy(),
1690  II->getType()->getVectorNumElements());
1691  Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
1692  PointerType::getUnqual(VTy));
1693  Value *Load = Builder->CreateLoad(Ptr);
1694  return new FPExtInst(Load, II->getType());
1695  }
1696  break;
1697  case Intrinsic::ppc_qpx_qvlfd:
1698  // Turn PPC QPX qvlfd -> load if the pointer is known aligned.
1699  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, &AC,
1700  &DT) >= 32) {
1701  Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
1702  PointerType::getUnqual(II->getType()));
1703  return new LoadInst(Ptr);
1704  }
1705  break;
1706  case Intrinsic::ppc_qpx_qvstfs:
1707  // Turn PPC QPX qvstfs -> store if the pointer is known aligned.
1708  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
1709  &DT) >= 16) {
1710  Type *VTy = VectorType::get(Builder->getFloatTy(),
1711  II->getArgOperand(0)->getType()->getVectorNumElements());
1712  Value *TOp = Builder->CreateFPTrunc(II->getArgOperand(0), VTy);
1713  Type *OpPtrTy = PointerType::getUnqual(VTy);
1714  Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
1715  return new StoreInst(TOp, Ptr);
1716  }
1717  break;
1718  case Intrinsic::ppc_qpx_qvstfd:
1719  // Turn PPC QPX qvstfd -> store if the pointer is known aligned.
1720  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, &AC,
1721  &DT) >= 32) {
1722  Type *OpPtrTy =
1723  PointerType::getUnqual(II->getArgOperand(0)->getType());
1724  Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
1725  return new StoreInst(II->getArgOperand(0), Ptr);
1726  }
1727  break;
1728 
1729  case Intrinsic::x86_vcvtph2ps_128:
1730  case Intrinsic::x86_vcvtph2ps_256: {
1731  auto Arg = II->getArgOperand(0);
1732  auto ArgType = cast<VectorType>(Arg->getType());
1733  auto RetType = cast<VectorType>(II->getType());
1734  unsigned ArgWidth = ArgType->getNumElements();
1735  unsigned RetWidth = RetType->getNumElements();
1736  assert(RetWidth <= ArgWidth && "Unexpected input/return vector widths");
1737  assert(ArgType->isIntOrIntVectorTy() &&
1738  ArgType->getScalarSizeInBits() == 16 &&
1739  "CVTPH2PS input type should be 16-bit integer vector");
1740  assert(RetType->getScalarType()->isFloatTy() &&
1741  "CVTPH2PS output type should be 32-bit float vector");
1742 
1743  // Constant folding: Convert to generic half to single conversion.
1744  if (isa<ConstantAggregateZero>(Arg))
1745  return replaceInstUsesWith(*II, ConstantAggregateZero::get(RetType));
1746 
1747  if (isa<ConstantDataVector>(Arg)) {
1748  auto VectorHalfAsShorts = Arg;
1749  if (RetWidth < ArgWidth) {
1750  SmallVector<uint32_t, 8> SubVecMask;
1751  for (unsigned i = 0; i != RetWidth; ++i)
1752  SubVecMask.push_back((int)i);
1753  VectorHalfAsShorts = Builder->CreateShuffleVector(
1754  Arg, UndefValue::get(ArgType), SubVecMask);
1755  }
1756 
1757  auto VectorHalfType =
1758  VectorType::get(Type::getHalfTy(II->getContext()), RetWidth);
1759  auto VectorHalfs =
1760  Builder->CreateBitCast(VectorHalfAsShorts, VectorHalfType);
1761  auto VectorFloats = Builder->CreateFPExt(VectorHalfs, RetType);
1762  return replaceInstUsesWith(*II, VectorFloats);
1763  }
1764 
1765  // We only use the lowest lanes of the argument.
1766  if (Value *V = SimplifyDemandedVectorEltsLow(Arg, ArgWidth, RetWidth)) {
1767  II->setArgOperand(0, V);
1768  return II;
1769  }
1770  break;
1771  }
1772 
1773  case Intrinsic::x86_sse_cvtss2si:
1774  case Intrinsic::x86_sse_cvtss2si64:
1775  case Intrinsic::x86_sse_cvttss2si:
1776  case Intrinsic::x86_sse_cvttss2si64:
1777  case Intrinsic::x86_sse2_cvtsd2si:
1778  case Intrinsic::x86_sse2_cvtsd2si64:
1779  case Intrinsic::x86_sse2_cvttsd2si:
1780  case Intrinsic::x86_sse2_cvttsd2si64:
1781  case Intrinsic::x86_avx512_vcvtss2si32:
1782  case Intrinsic::x86_avx512_vcvtss2si64:
1783  case Intrinsic::x86_avx512_vcvtss2usi32:
1784  case Intrinsic::x86_avx512_vcvtss2usi64:
1785  case Intrinsic::x86_avx512_vcvtsd2si32:
1786  case Intrinsic::x86_avx512_vcvtsd2si64:
1787  case Intrinsic::x86_avx512_vcvtsd2usi32:
1788  case Intrinsic::x86_avx512_vcvtsd2usi64:
1789  case Intrinsic::x86_avx512_cvttss2si:
1790  case Intrinsic::x86_avx512_cvttss2si64:
1791  case Intrinsic::x86_avx512_cvttss2usi:
1792  case Intrinsic::x86_avx512_cvttss2usi64:
1793  case Intrinsic::x86_avx512_cvttsd2si:
1794  case Intrinsic::x86_avx512_cvttsd2si64:
1795  case Intrinsic::x86_avx512_cvttsd2usi:
1796  case Intrinsic::x86_avx512_cvttsd2usi64: {
1797  // These intrinsics only demand the 0th element of their input vectors. If
1798  // we can simplify the input based on that, do so now.
1799  Value *Arg = II->getArgOperand(0);
1800  unsigned VWidth = Arg->getType()->getVectorNumElements();
1801  if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
1802  II->setArgOperand(0, V);
1803  return II;
1804  }
1805  break;
1806  }
1807 
1808  case Intrinsic::x86_mmx_pmovmskb:
1809  case Intrinsic::x86_sse_movmsk_ps:
1810  case Intrinsic::x86_sse2_movmsk_pd:
1811  case Intrinsic::x86_sse2_pmovmskb_128:
1812  case Intrinsic::x86_avx_movmsk_pd_256:
1813  case Intrinsic::x86_avx_movmsk_ps_256:
1814  case Intrinsic::x86_avx2_pmovmskb: {
1815  if (Value *V = simplifyX86movmsk(*II, *Builder))
1816  return replaceInstUsesWith(*II, V);
1817  break;
1818  }
1819 
1820  case Intrinsic::x86_sse_comieq_ss:
1821  case Intrinsic::x86_sse_comige_ss:
1822  case Intrinsic::x86_sse_comigt_ss:
1823  case Intrinsic::x86_sse_comile_ss:
1824  case Intrinsic::x86_sse_comilt_ss:
1825  case Intrinsic::x86_sse_comineq_ss:
1826  case Intrinsic::x86_sse_ucomieq_ss:
1827  case Intrinsic::x86_sse_ucomige_ss:
1828  case Intrinsic::x86_sse_ucomigt_ss:
1829  case Intrinsic::x86_sse_ucomile_ss:
1830  case Intrinsic::x86_sse_ucomilt_ss:
1831  case Intrinsic::x86_sse_ucomineq_ss:
1832  case Intrinsic::x86_sse2_comieq_sd:
1833  case Intrinsic::x86_sse2_comige_sd:
1834  case Intrinsic::x86_sse2_comigt_sd:
1835  case Intrinsic::x86_sse2_comile_sd:
1836  case Intrinsic::x86_sse2_comilt_sd:
1837  case Intrinsic::x86_sse2_comineq_sd:
1838  case Intrinsic::x86_sse2_ucomieq_sd:
1839  case Intrinsic::x86_sse2_ucomige_sd:
1840  case Intrinsic::x86_sse2_ucomigt_sd:
1841  case Intrinsic::x86_sse2_ucomile_sd:
1842  case Intrinsic::x86_sse2_ucomilt_sd:
1843  case Intrinsic::x86_sse2_ucomineq_sd:
1844  case Intrinsic::x86_avx512_vcomi_ss:
1845  case Intrinsic::x86_avx512_vcomi_sd:
1846  case Intrinsic::x86_avx512_mask_cmp_ss:
1847  case Intrinsic::x86_avx512_mask_cmp_sd: {
1848  // These intrinsics only demand the 0th element of their input vectors. If
1849  // we can simplify the input based on that, do so now.
1850  bool MadeChange = false;
1851  Value *Arg0 = II->getArgOperand(0);
1852  Value *Arg1 = II->getArgOperand(1);
1853  unsigned VWidth = Arg0->getType()->getVectorNumElements();
1854  if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
1855  II->setArgOperand(0, V);
1856  MadeChange = true;
1857  }
1858  if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
1859  II->setArgOperand(1, V);
1860  MadeChange = true;
1861  }
1862  if (MadeChange)
1863  return II;
1864  break;
1865  }
1866 
1867  case Intrinsic::x86_avx512_mask_add_ps_512:
1868  case Intrinsic::x86_avx512_mask_div_ps_512:
1869  case Intrinsic::x86_avx512_mask_mul_ps_512:
1870  case Intrinsic::x86_avx512_mask_sub_ps_512:
1871  case Intrinsic::x86_avx512_mask_add_pd_512:
1872  case Intrinsic::x86_avx512_mask_div_pd_512:
1873  case Intrinsic::x86_avx512_mask_mul_pd_512:
1874  case Intrinsic::x86_avx512_mask_sub_pd_512:
1875  // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
1876  // IR operations.
1877  if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
1878  if (R->getValue() == 4) {
1879  Value *Arg0 = II->getArgOperand(0);
1880  Value *Arg1 = II->getArgOperand(1);
1881 
1882  Value *V;
1883  switch (II->getIntrinsicID()) {
1884  default: llvm_unreachable("Case stmts out of sync!");
1885  case Intrinsic::x86_avx512_mask_add_ps_512:
1886  case Intrinsic::x86_avx512_mask_add_pd_512:
1887  V = Builder->CreateFAdd(Arg0, Arg1);
1888  break;
1889  case Intrinsic::x86_avx512_mask_sub_ps_512:
1890  case Intrinsic::x86_avx512_mask_sub_pd_512:
1891  V = Builder->CreateFSub(Arg0, Arg1);
1892  break;
1893  case Intrinsic::x86_avx512_mask_mul_ps_512:
1894  case Intrinsic::x86_avx512_mask_mul_pd_512:
1895  V = Builder->CreateFMul(Arg0, Arg1);
1896  break;
1897  case Intrinsic::x86_avx512_mask_div_ps_512:
1898  case Intrinsic::x86_avx512_mask_div_pd_512:
1899  V = Builder->CreateFDiv(Arg0, Arg1);
1900  break;
1901  }
1902 
1903  // Create a select for the masking.
1904  V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2),
1905  *Builder);
1906  return replaceInstUsesWith(*II, V);
1907  }
1908  }
1909  break;
1910 
1911  case Intrinsic::x86_avx512_mask_add_ss_round:
1912  case Intrinsic::x86_avx512_mask_div_ss_round:
1913  case Intrinsic::x86_avx512_mask_mul_ss_round:
1914  case Intrinsic::x86_avx512_mask_sub_ss_round:
1915  case Intrinsic::x86_avx512_mask_add_sd_round:
1916  case Intrinsic::x86_avx512_mask_div_sd_round:
1917  case Intrinsic::x86_avx512_mask_mul_sd_round:
1918  case Intrinsic::x86_avx512_mask_sub_sd_round:
1919  // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
1920  // IR operations.
1921  if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
1922  if (R->getValue() == 4) {
1923  // Extract the element as scalars.
1924  Value *Arg0 = II->getArgOperand(0);
1925  Value *Arg1 = II->getArgOperand(1);
1926  Value *LHS = Builder->CreateExtractElement(Arg0, (uint64_t)0);
1927  Value *RHS = Builder->CreateExtractElement(Arg1, (uint64_t)0);
1928 
1929  Value *V;
1930  switch (II->getIntrinsicID()) {
1931  default: llvm_unreachable("Case stmts out of sync!");
1932  case Intrinsic::x86_avx512_mask_add_ss_round:
1933  case Intrinsic::x86_avx512_mask_add_sd_round:
1934  V = Builder->CreateFAdd(LHS, RHS);
1935  break;
1936  case Intrinsic::x86_avx512_mask_sub_ss_round:
1937  case Intrinsic::x86_avx512_mask_sub_sd_round:
1938  V = Builder->CreateFSub(LHS, RHS);
1939  break;
1940  case Intrinsic::x86_avx512_mask_mul_ss_round:
1941  case Intrinsic::x86_avx512_mask_mul_sd_round:
1942  V = Builder->CreateFMul(LHS, RHS);
1943  break;
1944  case Intrinsic::x86_avx512_mask_div_ss_round:
1945  case Intrinsic::x86_avx512_mask_div_sd_round:
1946  V = Builder->CreateFDiv(LHS, RHS);
1947  break;
1948  }
1949 
1950  // Handle the masking aspect of the intrinsic.
1951  Value *Mask = II->getArgOperand(3);
1952  auto *C = dyn_cast<ConstantInt>(Mask);
1953  // We don't need a select if we know the mask bit is a 1.
1954  if (!C || !C->getValue()[0]) {
1955  // Cast the mask to an i1 vector and then extract the lowest element.
1956  auto *MaskTy = VectorType::get(Builder->getInt1Ty(),
1957  cast<IntegerType>(Mask->getType())->getBitWidth());
1958  Mask = Builder->CreateBitCast(Mask, MaskTy);
1959  Mask = Builder->CreateExtractElement(Mask, (uint64_t)0);
1960  // Extract the lowest element from the passthru operand.
1961  Value *Passthru = Builder->CreateExtractElement(II->getArgOperand(2),
1962  (uint64_t)0);
1963  V = Builder->CreateSelect(Mask, V, Passthru);
1964  }
1965 
1966  // Insert the result back into the original argument 0.
1967  V = Builder->CreateInsertElement(Arg0, V, (uint64_t)0);
1968 
1969  return replaceInstUsesWith(*II, V);
1970  }
1971  }
1973 
1974  // X86 scalar intrinsics simplified with SimplifyDemandedVectorElts.
1975  case Intrinsic::x86_avx512_mask_max_ss_round:
1976  case Intrinsic::x86_avx512_mask_min_ss_round:
1977  case Intrinsic::x86_avx512_mask_max_sd_round:
1978  case Intrinsic::x86_avx512_mask_min_sd_round:
1979  case Intrinsic::x86_avx512_mask_vfmadd_ss:
1980  case Intrinsic::x86_avx512_mask_vfmadd_sd:
1981  case Intrinsic::x86_avx512_maskz_vfmadd_ss:
1982  case Intrinsic::x86_avx512_maskz_vfmadd_sd:
1983  case Intrinsic::x86_avx512_mask3_vfmadd_ss:
1984  case Intrinsic::x86_avx512_mask3_vfmadd_sd:
1985  case Intrinsic::x86_avx512_mask3_vfmsub_ss:
1986  case Intrinsic::x86_avx512_mask3_vfmsub_sd:
1987  case Intrinsic::x86_avx512_mask3_vfnmsub_ss:
1988  case Intrinsic::x86_avx512_mask3_vfnmsub_sd:
1989  case Intrinsic::x86_fma_vfmadd_ss:
1990  case Intrinsic::x86_fma_vfmsub_ss:
1991  case Intrinsic::x86_fma_vfnmadd_ss:
1992  case Intrinsic::x86_fma_vfnmsub_ss:
1993  case Intrinsic::x86_fma_vfmadd_sd:
1994  case Intrinsic::x86_fma_vfmsub_sd:
1995  case Intrinsic::x86_fma_vfnmadd_sd:
1996  case Intrinsic::x86_fma_vfnmsub_sd:
1997  case Intrinsic::x86_sse_cmp_ss:
1998  case Intrinsic::x86_sse_min_ss:
1999  case Intrinsic::x86_sse_max_ss:
2000  case Intrinsic::x86_sse2_cmp_sd:
2001  case Intrinsic::x86_sse2_min_sd:
2002  case Intrinsic::x86_sse2_max_sd:
2003  case Intrinsic::x86_sse41_round_ss:
2004  case Intrinsic::x86_sse41_round_sd:
2005  case Intrinsic::x86_xop_vfrcz_ss:
2006  case Intrinsic::x86_xop_vfrcz_sd: {
2007  unsigned VWidth = II->getType()->getVectorNumElements();
2008  APInt UndefElts(VWidth, 0);
2009  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
2010  if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
2011  if (V != II)
2012  return replaceInstUsesWith(*II, V);
2013  return II;
2014  }
2015  break;
2016  }
2017 
2018  // Constant fold ashr( <A x Bi>, Ci ).
2019  // Constant fold lshr( <A x Bi>, Ci ).
2020  // Constant fold shl( <A x Bi>, Ci ).
2021  case Intrinsic::x86_sse2_psrai_d:
2022  case Intrinsic::x86_sse2_psrai_w:
2023  case Intrinsic::x86_avx2_psrai_d:
2024  case Intrinsic::x86_avx2_psrai_w:
2025  case Intrinsic::x86_avx512_psrai_q_128:
2026  case Intrinsic::x86_avx512_psrai_q_256:
2027  case Intrinsic::x86_avx512_psrai_d_512:
2028  case Intrinsic::x86_avx512_psrai_q_512:
2029  case Intrinsic::x86_avx512_psrai_w_512:
2030  case Intrinsic::x86_sse2_psrli_d:
2031  case Intrinsic::x86_sse2_psrli_q:
2032  case Intrinsic::x86_sse2_psrli_w:
2033  case Intrinsic::x86_avx2_psrli_d:
2034  case Intrinsic::x86_avx2_psrli_q:
2035  case Intrinsic::x86_avx2_psrli_w:
2036  case Intrinsic::x86_avx512_psrli_d_512:
2037  case Intrinsic::x86_avx512_psrli_q_512:
2038  case Intrinsic::x86_avx512_psrli_w_512:
2039  case Intrinsic::x86_sse2_pslli_d:
2040  case Intrinsic::x86_sse2_pslli_q:
2041  case Intrinsic::x86_sse2_pslli_w:
2042  case Intrinsic::x86_avx2_pslli_d:
2043  case Intrinsic::x86_avx2_pslli_q:
2044  case Intrinsic::x86_avx2_pslli_w:
2045  case Intrinsic::x86_avx512_pslli_d_512:
2046  case Intrinsic::x86_avx512_pslli_q_512:
2047  case Intrinsic::x86_avx512_pslli_w_512:
2048  if (Value *V = simplifyX86immShift(*II, *Builder))
2049  return replaceInstUsesWith(*II, V);
2050  break;
2051 
2052  case Intrinsic::x86_sse2_psra_d:
2053  case Intrinsic::x86_sse2_psra_w:
2054  case Intrinsic::x86_avx2_psra_d:
2055  case Intrinsic::x86_avx2_psra_w:
2056  case Intrinsic::x86_avx512_psra_q_128:
2057  case Intrinsic::x86_avx512_psra_q_256:
2058  case Intrinsic::x86_avx512_psra_d_512:
2059  case Intrinsic::x86_avx512_psra_q_512:
2060  case Intrinsic::x86_avx512_psra_w_512:
2061  case Intrinsic::x86_sse2_psrl_d:
2062  case Intrinsic::x86_sse2_psrl_q:
2063  case Intrinsic::x86_sse2_psrl_w:
2064  case Intrinsic::x86_avx2_psrl_d:
2065  case Intrinsic::x86_avx2_psrl_q:
2066  case Intrinsic::x86_avx2_psrl_w:
2067  case Intrinsic::x86_avx512_psrl_d_512:
2068  case Intrinsic::x86_avx512_psrl_q_512:
2069  case Intrinsic::x86_avx512_psrl_w_512:
2070  case Intrinsic::x86_sse2_psll_d:
2071  case Intrinsic::x86_sse2_psll_q:
2072  case Intrinsic::x86_sse2_psll_w:
2073  case Intrinsic::x86_avx2_psll_d:
2074  case Intrinsic::x86_avx2_psll_q:
2075  case Intrinsic::x86_avx2_psll_w:
2076  case Intrinsic::x86_avx512_psll_d_512:
2077  case Intrinsic::x86_avx512_psll_q_512:
2078  case Intrinsic::x86_avx512_psll_w_512: {
2079  if (Value *V = simplifyX86immShift(*II, *Builder))
2080  return replaceInstUsesWith(*II, V);
2081 
2082  // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector
2083  // operand to compute the shift amount.
2084  Value *Arg1 = II->getArgOperand(1);
2085  assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 &&
2086  "Unexpected packed shift size");
2087  unsigned VWidth = Arg1->getType()->getVectorNumElements();
2088 
2089  if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2090  II->setArgOperand(1, V);
2091  return II;
2092  }
2093  break;
2094  }
2095 
2096  case Intrinsic::x86_avx2_psllv_d:
2097  case Intrinsic::x86_avx2_psllv_d_256:
2098  case Intrinsic::x86_avx2_psllv_q:
2099  case Intrinsic::x86_avx2_psllv_q_256:
2100  case Intrinsic::x86_avx512_psllv_d_512:
2101  case Intrinsic::x86_avx512_psllv_q_512:
2102  case Intrinsic::x86_avx512_psllv_w_128:
2103  case Intrinsic::x86_avx512_psllv_w_256:
2104  case Intrinsic::x86_avx512_psllv_w_512:
2105  case Intrinsic::x86_avx2_psrav_d:
2106  case Intrinsic::x86_avx2_psrav_d_256:
2107  case Intrinsic::x86_avx512_psrav_q_128:
2108  case Intrinsic::x86_avx512_psrav_q_256:
2109  case Intrinsic::x86_avx512_psrav_d_512:
2110  case Intrinsic::x86_avx512_psrav_q_512:
2111  case Intrinsic::x86_avx512_psrav_w_128:
2112  case Intrinsic::x86_avx512_psrav_w_256:
2113  case Intrinsic::x86_avx512_psrav_w_512:
2114  case Intrinsic::x86_avx2_psrlv_d:
2115  case Intrinsic::x86_avx2_psrlv_d_256:
2116  case Intrinsic::x86_avx2_psrlv_q:
2117  case Intrinsic::x86_avx2_psrlv_q_256:
2118  case Intrinsic::x86_avx512_psrlv_d_512:
2119  case Intrinsic::x86_avx512_psrlv_q_512:
2120  case Intrinsic::x86_avx512_psrlv_w_128:
2121  case Intrinsic::x86_avx512_psrlv_w_256:
2122  case Intrinsic::x86_avx512_psrlv_w_512:
2123  if (Value *V = simplifyX86varShift(*II, *Builder))
2124  return replaceInstUsesWith(*II, V);
2125  break;
2126 
2127  case Intrinsic::x86_sse2_pmulu_dq:
2128  case Intrinsic::x86_sse41_pmuldq:
2129  case Intrinsic::x86_avx2_pmul_dq:
2130  case Intrinsic::x86_avx2_pmulu_dq:
2131  case Intrinsic::x86_avx512_pmul_dq_512:
2132  case Intrinsic::x86_avx512_pmulu_dq_512: {
2133  unsigned VWidth = II->getType()->getVectorNumElements();
2134  APInt UndefElts(VWidth, 0);
2135  APInt DemandedElts = APInt::getAllOnesValue(VWidth);
2136  if (Value *V = SimplifyDemandedVectorElts(II, DemandedElts, UndefElts)) {
2137  if (V != II)
2138  return replaceInstUsesWith(*II, V);
2139  return II;
2140  }
2141  break;
2142  }
2143 
2144  case Intrinsic::x86_sse41_insertps:
2145  if (Value *V = simplifyX86insertps(*II, *Builder))
2146  return replaceInstUsesWith(*II, V);
2147  break;
2148 
2149  case Intrinsic::x86_sse4a_extrq: {
2150  Value *Op0 = II->getArgOperand(0);
2151  Value *Op1 = II->getArgOperand(1);
2152  unsigned VWidth0 = Op0->getType()->getVectorNumElements();
2153  unsigned VWidth1 = Op1->getType()->getVectorNumElements();
2154  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2155  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2156  VWidth1 == 16 && "Unexpected operand sizes");
2157 
2158  // See if we're dealing with constant values.
2159  Constant *C1 = dyn_cast<Constant>(Op1);
2160  ConstantInt *CILength =
2161  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
2162  : nullptr;
2163  ConstantInt *CIIndex =
2164  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2165  : nullptr;
2166 
2167  // Attempt to simplify to a constant, shuffle vector or EXTRQI call.
2168  if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder))
2169  return replaceInstUsesWith(*II, V);
2170 
2171  // EXTRQ only uses the lowest 64-bits of the first 128-bit vector
2172  // operands and the lowest 16-bits of the second.
2173  bool MadeChange = false;
2174  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2175  II->setArgOperand(0, V);
2176  MadeChange = true;
2177  }
2178  if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2179  II->setArgOperand(1, V);
2180  MadeChange = true;
2181  }
2182  if (MadeChange)
2183  return II;
2184  break;
2185  }
2186 
2187  case Intrinsic::x86_sse4a_extrqi: {
2188  // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining
2189  // bits of the lower 64-bits. The upper 64-bits are undefined.
2190  Value *Op0 = II->getArgOperand(0);
2191  unsigned VWidth = Op0->getType()->getVectorNumElements();
2192  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2193  "Unexpected operand size");
2194 
2195  // See if we're dealing with constant values.
2196  ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(1));
2197  ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(2));
2198 
2199  // Attempt to simplify to a constant or shuffle vector.
2200  if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder))
2201  return replaceInstUsesWith(*II, V);
2202 
2203  // EXTRQI only uses the lowest 64-bits of the first 128-bit vector
2204  // operand.
2205  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2206  II->setArgOperand(0, V);
2207  return II;
2208  }
2209  break;
2210  }
2211 
2212  case Intrinsic::x86_sse4a_insertq: {
2213  Value *Op0 = II->getArgOperand(0);
2214  Value *Op1 = II->getArgOperand(1);
2215  unsigned VWidth = Op0->getType()->getVectorNumElements();
2216  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2217  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2218  Op1->getType()->getVectorNumElements() == 2 &&
2219  "Unexpected operand size");
2220 
2221  // See if we're dealing with constant values.
2222  Constant *C1 = dyn_cast<Constant>(Op1);
2223  ConstantInt *CI11 =
2224  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2225  : nullptr;
2226 
2227  // Attempt to simplify to a constant, shuffle vector or INSERTQI call.
2228  if (CI11) {
2229  const APInt &V11 = CI11->getValue();
2230  APInt Len = V11.zextOrTrunc(6);
2231  APInt Idx = V11.lshr(8).zextOrTrunc(6);
2232  if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder))
2233  return replaceInstUsesWith(*II, V);
2234  }
2235 
2236  // INSERTQ only uses the lowest 64-bits of the first 128-bit vector
2237  // operand.
2238  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2239  II->setArgOperand(0, V);
2240  return II;
2241  }
2242  break;
2243  }
2244 
2245  case Intrinsic::x86_sse4a_insertqi: {
2246  // INSERTQI: Extract lowest Length bits from lower half of second source and
2247  // insert over first source starting at Index bit. The upper 64-bits are
2248  // undefined.
2249  Value *Op0 = II->getArgOperand(0);
2250  Value *Op1 = II->getArgOperand(1);
2251  unsigned VWidth0 = Op0->getType()->getVectorNumElements();
2252  unsigned VWidth1 = Op1->getType()->getVectorNumElements();
2253  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2254  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2255  VWidth1 == 2 && "Unexpected operand sizes");
2256 
2257  // See if we're dealing with constant values.
2258  ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(2));
2259  ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(3));
2260 
2261  // Attempt to simplify to a constant or shuffle vector.
2262  if (CILength && CIIndex) {
2263  APInt Len = CILength->getValue().zextOrTrunc(6);
2264  APInt Idx = CIIndex->getValue().zextOrTrunc(6);
2265  if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder))
2266  return replaceInstUsesWith(*II, V);
2267  }
2268 
2269  // INSERTQI only uses the lowest 64-bits of the first two 128-bit vector
2270  // operands.
2271  bool MadeChange = false;
2272  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2273  II->setArgOperand(0, V);
2274  MadeChange = true;
2275  }
2276  if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2277  II->setArgOperand(1, V);
2278  MadeChange = true;
2279  }
2280  if (MadeChange)
2281  return II;
2282  break;
2283  }
2284 
2285  case Intrinsic::x86_sse41_pblendvb:
2286  case Intrinsic::x86_sse41_blendvps:
2287  case Intrinsic::x86_sse41_blendvpd:
2288  case Intrinsic::x86_avx_blendv_ps_256:
2289  case Intrinsic::x86_avx_blendv_pd_256:
2290  case Intrinsic::x86_avx2_pblendvb: {
2291  // Convert blendv* to vector selects if the mask is constant.
2292  // This optimization is convoluted because the intrinsic is defined as
2293  // getting a vector of floats or doubles for the ps and pd versions.
2294  // FIXME: That should be changed.
2295 
2296  Value *Op0 = II->getArgOperand(0);
2297  Value *Op1 = II->getArgOperand(1);
2298  Value *Mask = II->getArgOperand(2);
2299 
2300  // fold (blend A, A, Mask) -> A
2301  if (Op0 == Op1)
2302  return replaceInstUsesWith(CI, Op0);
2303 
2304  // Zero Mask - select 1st argument.
2305  if (isa<ConstantAggregateZero>(Mask))
2306  return replaceInstUsesWith(CI, Op0);
2307 
2308  // Constant Mask - select 1st/2nd argument lane based on top bit of mask.
2309  if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
2310  Constant *NewSelector = getNegativeIsTrueBoolVec(ConstantMask);
2311  return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
2312  }
2313  break;
2314  }
2315 
2316  case Intrinsic::x86_ssse3_pshuf_b_128:
2317  case Intrinsic::x86_avx2_pshuf_b:
2318  case Intrinsic::x86_avx512_pshuf_b_512:
2319  if (Value *V = simplifyX86pshufb(*II, *Builder))
2320  return replaceInstUsesWith(*II, V);
2321  break;
2322 
2323  case Intrinsic::x86_avx_vpermilvar_ps:
2324  case Intrinsic::x86_avx_vpermilvar_ps_256:
2325  case Intrinsic::x86_avx512_vpermilvar_ps_512:
2326  case Intrinsic::x86_avx_vpermilvar_pd:
2327  case Intrinsic::x86_avx_vpermilvar_pd_256:
2328  case Intrinsic::x86_avx512_vpermilvar_pd_512:
2329  if (Value *V = simplifyX86vpermilvar(*II, *Builder))
2330  return replaceInstUsesWith(*II, V);
2331  break;
2332 
2333  case Intrinsic::x86_avx2_permd:
2334  case Intrinsic::x86_avx2_permps:
2335  if (Value *V = simplifyX86vpermv(*II, *Builder))
2336  return replaceInstUsesWith(*II, V);
2337  break;
2338 
2339  case Intrinsic::x86_avx512_mask_permvar_df_256:
2340  case Intrinsic::x86_avx512_mask_permvar_df_512:
2341  case Intrinsic::x86_avx512_mask_permvar_di_256:
2342  case Intrinsic::x86_avx512_mask_permvar_di_512:
2343  case Intrinsic::x86_avx512_mask_permvar_hi_128:
2344  case Intrinsic::x86_avx512_mask_permvar_hi_256:
2345  case Intrinsic::x86_avx512_mask_permvar_hi_512:
2346  case Intrinsic::x86_avx512_mask_permvar_qi_128:
2347  case Intrinsic::x86_avx512_mask_permvar_qi_256:
2348  case Intrinsic::x86_avx512_mask_permvar_qi_512:
2349  case Intrinsic::x86_avx512_mask_permvar_sf_256:
2350  case Intrinsic::x86_avx512_mask_permvar_sf_512:
2351  case Intrinsic::x86_avx512_mask_permvar_si_256:
2352  case Intrinsic::x86_avx512_mask_permvar_si_512:
2353  if (Value *V = simplifyX86vpermv(*II, *Builder)) {
2354  // We simplified the permuting, now create a select for the masking.
2355  V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2),
2356  *Builder);
2357  return replaceInstUsesWith(*II, V);
2358  }
2359  break;
2360 
2361  case Intrinsic::x86_avx_vperm2f128_pd_256:
2362  case Intrinsic::x86_avx_vperm2f128_ps_256:
2363  case Intrinsic::x86_avx_vperm2f128_si_256:
2364  case Intrinsic::x86_avx2_vperm2i128:
2365  if (Value *V = simplifyX86vperm2(*II, *Builder))
2366  return replaceInstUsesWith(*II, V);
2367  break;
2368 
2369  case Intrinsic::x86_avx_maskload_ps:
2370  case Intrinsic::x86_avx_maskload_pd:
2371  case Intrinsic::x86_avx_maskload_ps_256:
2372  case Intrinsic::x86_avx_maskload_pd_256:
2373  case Intrinsic::x86_avx2_maskload_d:
2374  case Intrinsic::x86_avx2_maskload_q:
2375  case Intrinsic::x86_avx2_maskload_d_256:
2376  case Intrinsic::x86_avx2_maskload_q_256:
2377  if (Instruction *I = simplifyX86MaskedLoad(*II, *this))
2378  return I;
2379  break;
2380 
2381  case Intrinsic::x86_sse2_maskmov_dqu:
2382  case Intrinsic::x86_avx_maskstore_ps:
2383  case Intrinsic::x86_avx_maskstore_pd:
2384  case Intrinsic::x86_avx_maskstore_ps_256:
2385  case Intrinsic::x86_avx_maskstore_pd_256:
2386  case Intrinsic::x86_avx2_maskstore_d:
2387  case Intrinsic::x86_avx2_maskstore_q:
2388  case Intrinsic::x86_avx2_maskstore_d_256:
2389  case Intrinsic::x86_avx2_maskstore_q_256:
2390  if (simplifyX86MaskedStore(*II, *this))
2391  return nullptr;
2392  break;
2393 
2394  case Intrinsic::x86_xop_vpcomb:
2395  case Intrinsic::x86_xop_vpcomd:
2396  case Intrinsic::x86_xop_vpcomq:
2397  case Intrinsic::x86_xop_vpcomw:
2398  if (Value *V = simplifyX86vpcom(*II, *Builder, true))
2399  return replaceInstUsesWith(*II, V);
2400  break;
2401 
2402  case Intrinsic::x86_xop_vpcomub:
2403  case Intrinsic::x86_xop_vpcomud:
2404  case Intrinsic::x86_xop_vpcomuq:
2405  case Intrinsic::x86_xop_vpcomuw:
2406  if (Value *V = simplifyX86vpcom(*II, *Builder, false))
2407  return replaceInstUsesWith(*II, V);
2408  break;
2409 
2410  case Intrinsic::ppc_altivec_vperm:
2411  // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
2412  // Note that ppc_altivec_vperm has a big-endian bias, so when creating
2413  // a vectorshuffle for little endian, we must undo the transformation
2414  // performed on vec_perm in altivec.h. That is, we must complement
2415  // the permutation mask with respect to 31 and reverse the order of
2416  // V1 and V2.
2417  if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
2418  assert(Mask->getType()->getVectorNumElements() == 16 &&
2419  "Bad type for intrinsic!");
2420 
2421  // Check that all of the elements are integer constants or undefs.
2422  bool AllEltsOk = true;
2423  for (unsigned i = 0; i != 16; ++i) {
2424  Constant *Elt = Mask->getAggregateElement(i);
2425  if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
2426  AllEltsOk = false;
2427  break;
2428  }
2429  }
2430 
2431  if (AllEltsOk) {
2432  // Cast the input vectors to byte vectors.
2433  Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0),
2434  Mask->getType());
2435  Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1),
2436  Mask->getType());
2437  Value *Result = UndefValue::get(Op0->getType());
2438 
2439  // Only extract each element once.
2440  Value *ExtractedElts[32];
2441  memset(ExtractedElts, 0, sizeof(ExtractedElts));
2442 
2443  for (unsigned i = 0; i != 16; ++i) {
2444  if (isa<UndefValue>(Mask->getAggregateElement(i)))
2445  continue;
2446  unsigned Idx =
2447  cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
2448  Idx &= 31; // Match the hardware behavior.
2449  if (DL.isLittleEndian())
2450  Idx = 31 - Idx;
2451 
2452  if (!ExtractedElts[Idx]) {
2453  Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
2454  Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
2455  ExtractedElts[Idx] =
2456  Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
2457  Builder->getInt32(Idx&15));
2458  }
2459 
2460  // Insert this value into the result vector.
2461  Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
2462  Builder->getInt32(i));
2463  }
2464  return CastInst::Create(Instruction::BitCast, Result, CI.getType());
2465  }
2466  }
2467  break;
2468 
2469  case Intrinsic::arm_neon_vld1:
2470  case Intrinsic::arm_neon_vld2:
2471  case Intrinsic::arm_neon_vld3:
2472  case Intrinsic::arm_neon_vld4:
2473  case Intrinsic::arm_neon_vld2lane:
2474  case Intrinsic::arm_neon_vld3lane:
2475  case Intrinsic::arm_neon_vld4lane:
2476  case Intrinsic::arm_neon_vst1:
2477  case Intrinsic::arm_neon_vst2:
2478  case Intrinsic::arm_neon_vst3:
2479  case Intrinsic::arm_neon_vst4:
2480  case Intrinsic::arm_neon_vst2lane:
2481  case Intrinsic::arm_neon_vst3lane:
2482  case Intrinsic::arm_neon_vst4lane: {
2483  unsigned MemAlign =
2484  getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
2485  unsigned AlignArg = II->getNumArgOperands() - 1;
2486  ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
2487  if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
2488  II->setArgOperand(AlignArg,
2489  ConstantInt::get(Type::getInt32Ty(II->getContext()),
2490  MemAlign, false));
2491  return II;
2492  }
2493  break;
2494  }
2495 
2496  case Intrinsic::arm_neon_vmulls:
2497  case Intrinsic::arm_neon_vmullu:
2498  case Intrinsic::aarch64_neon_smull:
2499  case Intrinsic::aarch64_neon_umull: {
2500  Value *Arg0 = II->getArgOperand(0);
2501  Value *Arg1 = II->getArgOperand(1);
2502 
2503  // Handle mul by zero first:
2504  if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
2505  return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
2506  }
2507 
2508  // Check for constant LHS & RHS - in this case we just simplify.
2509  bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu ||
2510  II->getIntrinsicID() == Intrinsic::aarch64_neon_umull);
2511  VectorType *NewVT = cast<VectorType>(II->getType());
2512  if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
2513  if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
2514  CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext);
2515  CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext);
2516 
2517  return replaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1));
2518  }
2519 
2520  // Couldn't simplify - canonicalize constant to the RHS.
2521  std::swap(Arg0, Arg1);
2522  }
2523 
2524  // Handle mul by one:
2525  if (Constant *CV1 = dyn_cast<Constant>(Arg1))
2526  if (ConstantInt *Splat =
2527  dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
2528  if (Splat->isOne())
2529  return CastInst::CreateIntegerCast(Arg0, II->getType(),
2530  /*isSigned=*/!Zext);
2531 
2532  break;
2533  }
2534 
2535  case Intrinsic::amdgcn_rcp: {
2536  if (const ConstantFP *C = dyn_cast<ConstantFP>(II->getArgOperand(0))) {
2537  const APFloat &ArgVal = C->getValueAPF();
2538  APFloat Val(ArgVal.getSemantics(), 1.0);
2539  APFloat::opStatus Status = Val.divide(ArgVal,
2541  // Only do this if it was exact and therefore not dependent on the
2542  // rounding mode.
2543  if (Status == APFloat::opOK)
2544  return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val));
2545  }
2546 
2547  break;
2548  }
2549  case Intrinsic::amdgcn_frexp_mant:
2550  case Intrinsic::amdgcn_frexp_exp: {
2551  Value *Src = II->getArgOperand(0);
2552  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
2553  int Exp;
2554  APFloat Significand = frexp(C->getValueAPF(), Exp,
2556 
2557  if (II->getIntrinsicID() == Intrinsic::amdgcn_frexp_mant) {
2558  return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(),
2559  Significand));
2560  }
2561 
2562  // Match instruction special case behavior.
2563  if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
2564  Exp = 0;
2565 
2566  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Exp));
2567  }
2568 
2569  if (isa<UndefValue>(Src))
2570  return replaceInstUsesWith(CI, UndefValue::get(II->getType()));
2571 
2572  break;
2573  }
2574  case Intrinsic::amdgcn_class: {
2575  enum {
2576  S_NAN = 1 << 0, // Signaling NaN
2577  Q_NAN = 1 << 1, // Quiet NaN
2578  N_INFINITY = 1 << 2, // Negative infinity
2579  N_NORMAL = 1 << 3, // Negative normal
2580  N_SUBNORMAL = 1 << 4, // Negative subnormal
2581  N_ZERO = 1 << 5, // Negative zero
2582  P_ZERO = 1 << 6, // Positive zero
2583  P_SUBNORMAL = 1 << 7, // Positive subnormal
2584  P_NORMAL = 1 << 8, // Positive normal
2585  P_INFINITY = 1 << 9 // Positive infinity
2586  };
2587 
2588  const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL |
2590 
2591  Value *Src0 = II->getArgOperand(0);
2592  Value *Src1 = II->getArgOperand(1);
2593  const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
2594  if (!CMask) {
2595  if (isa<UndefValue>(Src0))
2596  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
2597 
2598  if (isa<UndefValue>(Src1))
2599  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
2600  break;
2601  }
2602 
2603  uint32_t Mask = CMask->getZExtValue();
2604 
2605  // If all tests are made, it doesn't matter what the value is.
2606  if ((Mask & FullMask) == FullMask)
2607  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), true));
2608 
2609  if ((Mask & FullMask) == 0)
2610  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
2611 
2612  if (Mask == (S_NAN | Q_NAN)) {
2613  // Equivalent of isnan. Replace with standard fcmp.
2614  Value *FCmp = Builder->CreateFCmpUNO(Src0, Src0);
2615  FCmp->takeName(II);
2616  return replaceInstUsesWith(*II, FCmp);
2617  }
2618 
2619  const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
2620  if (!CVal) {
2621  if (isa<UndefValue>(Src0))
2622  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
2623 
2624  // Clamp mask to used bits
2625  if ((Mask & FullMask) != Mask) {
2626  CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(),
2627  { Src0, ConstantInt::get(Src1->getType(), Mask & FullMask) }
2628  );
2629 
2630  NewCall->takeName(II);
2631  return replaceInstUsesWith(*II, NewCall);
2632  }
2633 
2634  break;
2635  }
2636 
2637  const APFloat &Val = CVal->getValueAPF();
2638 
2639  bool Result =
2640  ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) ||
2641  ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) ||
2642  ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) ||
2643  ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) ||
2644  ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) ||
2645  ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) ||
2646  ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) ||
2647  ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) ||
2648  ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) ||
2649  ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative());
2650 
2651  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), Result));
2652  }
2653  case Intrinsic::stackrestore: {
2654  // If the save is right next to the restore, remove the restore. This can
2655  // happen when variable allocas are DCE'd.
2656  if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
2657  if (SS->getIntrinsicID() == Intrinsic::stacksave) {
2658  if (&*++SS->getIterator() == II)
2659  return eraseInstFromFunction(CI);
2660  }
2661  }
2662 
2663  // Scan down this block to see if there is another stack restore in the
2664  // same block without an intervening call/alloca.
2665  BasicBlock::iterator BI(II);
2666  TerminatorInst *TI = II->getParent()->getTerminator();
2667  bool CannotRemove = false;
2668  for (++BI; &*BI != TI; ++BI) {
2669  if (isa<AllocaInst>(BI)) {
2670  CannotRemove = true;
2671  break;
2672  }
2673  if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
2674  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
2675  // If there is a stackrestore below this one, remove this one.
2676  if (II->getIntrinsicID() == Intrinsic::stackrestore)
2677  return eraseInstFromFunction(CI);
2678 
2679  // Bail if we cross over an intrinsic with side effects, such as
2680  // llvm.stacksave, llvm.read_register, or llvm.setjmp.
2681  if (II->mayHaveSideEffects()) {
2682  CannotRemove = true;
2683  break;
2684  }
2685  } else {
2686  // If we found a non-intrinsic call, we can't remove the stack
2687  // restore.
2688  CannotRemove = true;
2689  break;
2690  }
2691  }
2692  }
2693 
2694  // If the stack restore is in a return, resume, or unwind block and if there
2695  // are no allocas or calls between the restore and the return, nuke the
2696  // restore.
2697  if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
2698  return eraseInstFromFunction(CI);
2699  break;
2700  }
2701  case Intrinsic::lifetime_start:
2702  // Asan needs to poison memory to detect invalid access which is possible
2703  // even for empty lifetime range.
2704  if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress))
2705  break;
2706 
2707  if (removeTriviallyEmptyRange(*II, Intrinsic::lifetime_start,
2708  Intrinsic::lifetime_end, *this))
2709  return nullptr;
2710  break;
2711  case Intrinsic::assume: {
2712  Value *IIOperand = II->getArgOperand(0);
2713  // Remove an assume if it is immediately followed by an identical assume.
2714  if (match(II->getNextNode(),
2715  m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
2716  return eraseInstFromFunction(CI);
2717 
2718  // Canonicalize assume(a && b) -> assume(a); assume(b);
2719  // Note: New assumption intrinsics created here are registered by
2720  // the InstCombineIRInserter object.
2721  Value *AssumeIntrinsic = II->getCalledValue(), *A, *B;
2722  if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
2723  Builder->CreateCall(AssumeIntrinsic, A, II->getName());
2724  Builder->CreateCall(AssumeIntrinsic, B, II->getName());
2725  return eraseInstFromFunction(*II);
2726  }
2727  // assume(!(a || b)) -> assume(!a); assume(!b);
2728  if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
2729  Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(A),
2730  II->getName());
2731  Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(B),
2732  II->getName());
2733  return eraseInstFromFunction(*II);
2734  }
2735 
2736  // assume( (load addr) != null ) -> add 'nonnull' metadata to load
2737  // (if assume is valid at the load)
2738  CmpInst::Predicate Pred;
2739  Instruction *LHS;
2740  if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) &&
2741  Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
2742  LHS->getType()->isPointerTy() &&
2743  isValidAssumeForContext(II, LHS, &DT)) {
2744  MDNode *MD = MDNode::get(II->getContext(), None);
2746  return eraseInstFromFunction(*II);
2747 
2748  // TODO: apply nonnull return attributes to calls and invokes
2749  // TODO: apply range metadata for range check patterns?
2750  }
2751 
2752  // If there is a dominating assume with the same condition as this one,
2753  // then this one is redundant, and should be removed.
2754  APInt KnownZero(1, 0), KnownOne(1, 0);
2755  computeKnownBits(IIOperand, KnownZero, KnownOne, 0, II);
2756  if (KnownOne.isAllOnesValue())
2757  return eraseInstFromFunction(*II);
2758 
2759  // Update the cache of affected values for this assumption (we might be
2760  // here because we just simplified the condition).
2761  AC.updateAffectedValues(II);
2762  break;
2763  }
2764  case Intrinsic::experimental_gc_relocate: {
2765  // Translate facts known about a pointer before relocating into
2766  // facts about the relocate value, while being careful to
2767  // preserve relocation semantics.
2768  Value *DerivedPtr = cast<GCRelocateInst>(II)->getDerivedPtr();
2769 
2770  // Remove the relocation if unused, note that this check is required
2771  // to prevent the cases below from looping forever.
2772  if (II->use_empty())
2773  return eraseInstFromFunction(*II);
2774 
2775  // Undef is undef, even after relocation.
2776  // TODO: provide a hook for this in GCStrategy. This is clearly legal for
2777  // most practical collectors, but there was discussion in the review thread
2778  // about whether it was legal for all possible collectors.
2779  if (isa<UndefValue>(DerivedPtr))
2780  // Use undef of gc_relocate's type to replace it.
2781  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
2782 
2783  if (auto *PT = dyn_cast<PointerType>(II->getType())) {
2784  // The relocation of null will be null for most any collector.
2785  // TODO: provide a hook for this in GCStrategy. There might be some
2786  // weird collector this property does not hold for.
2787  if (isa<ConstantPointerNull>(DerivedPtr))
2788  // Use null-pointer of gc_relocate's type to replace it.
2789  return replaceInstUsesWith(*II, ConstantPointerNull::get(PT));
2790 
2791  // isKnownNonNull -> nonnull attribute
2792  if (isKnownNonNullAt(DerivedPtr, II, &DT))
2793  II->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
2794  }
2795 
2796  // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
2797  // Canonicalize on the type from the uses to the defs
2798 
2799  // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
2800  break;
2801  }
2802  }
2803 
2804  return visitCallSite(II);
2805 }
2806 
2807 // InvokeInst simplification
2808 //
2810  return visitCallSite(&II);
2811 }
2812 
2813 /// If this cast does not affect the value passed through the varargs area, we
2814 /// can eliminate the use of the cast.
2816  const DataLayout &DL,
2817  const CastInst *const CI,
2818  const int ix) {
2819  if (!CI->isLosslessCast())
2820  return false;
2821 
2822  // If this is a GC intrinsic, avoid munging types. We need types for
2823  // statepoint reconstruction in SelectionDAG.
2824  // TODO: This is probably something which should be expanded to all
2825  // intrinsics since the entire point of intrinsics is that
2826  // they are understandable by the optimizer.
2827  if (isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS))
2828  return false;
2829 
2830  // The size of ByVal or InAlloca arguments is derived from the type, so we
2831  // can't change to a type with a different size. If the size were
2832  // passed explicitly we could avoid this check.
2833  if (!CS.isByValOrInAllocaArgument(ix))
2834  return true;
2835 
2836  Type* SrcTy =
2837  cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
2838  Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
2839  if (!SrcTy->isSized() || !DstTy->isSized())
2840  return false;
2841  if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
2842  return false;
2843  return true;
2844 }
2845 
2846 Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
2847  if (!CI->getCalledFunction()) return nullptr;
2848 
2849  auto InstCombineRAUW = [this](Instruction *From, Value *With) {
2850  replaceInstUsesWith(*From, With);
2851  };
2852  LibCallSimplifier Simplifier(DL, &TLI, InstCombineRAUW);
2853  if (Value *With = Simplifier.optimizeCall(CI)) {
2854  ++NumSimplified;
2855  return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
2856  }
2857 
2858  return nullptr;
2859 }
2860 
2862  // Strip off at most one level of pointer casts, looking for an alloca. This
2863  // is good enough in practice and simpler than handling any number of casts.
2864  Value *Underlying = TrampMem->stripPointerCasts();
2865  if (Underlying != TrampMem &&
2866  (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
2867  return nullptr;
2868  if (!isa<AllocaInst>(Underlying))
2869  return nullptr;
2870 
2871  IntrinsicInst *InitTrampoline = nullptr;
2872  for (User *U : TrampMem->users()) {
2874  if (!II)
2875  return nullptr;
2876  if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
2877  if (InitTrampoline)
2878  // More than one init_trampoline writes to this value. Give up.
2879  return nullptr;
2880  InitTrampoline = II;
2881  continue;
2882  }
2883  if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
2884  // Allow any number of calls to adjust.trampoline.
2885  continue;
2886  return nullptr;
2887  }
2888 
2889  // No call to init.trampoline found.
2890  if (!InitTrampoline)
2891  return nullptr;
2892 
2893  // Check that the alloca is being used in the expected way.
2894  if (InitTrampoline->getOperand(0) != TrampMem)
2895  return nullptr;
2896 
2897  return InitTrampoline;
2898 }
2899 
2901  Value *TrampMem) {
2902  // Visit all the previous instructions in the basic block, and try to find a
2903  // init.trampoline which has a direct path to the adjust.trampoline.
2904  for (BasicBlock::iterator I = AdjustTramp->getIterator(),
2905  E = AdjustTramp->getParent()->begin();
2906  I != E;) {
2907  Instruction *Inst = &*--I;
2908  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
2909  if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
2910  II->getOperand(0) == TrampMem)
2911  return II;
2912  if (Inst->mayWriteToMemory())
2913  return nullptr;
2914  }
2915  return nullptr;
2916 }
2917 
2918 // Given a call to llvm.adjust.trampoline, find and return the corresponding
2919 // call to llvm.init.trampoline if the call to the trampoline can be optimized
2920 // to a direct call to a function. Otherwise return NULL.
2921 //
2923  Callee = Callee->stripPointerCasts();
2924  IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
2925  if (!AdjustTramp ||
2926  AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
2927  return nullptr;
2928 
2929  Value *TrampMem = AdjustTramp->getOperand(0);
2930 
2932  return IT;
2933  if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
2934  return IT;
2935  return nullptr;
2936 }
2937 
2938 /// Improvements for call and invoke instructions.
2939 Instruction *InstCombiner::visitCallSite(CallSite CS) {
2940  if (isAllocLikeFn(CS.getInstruction(), &TLI))
2941  return visitAllocSite(*CS.getInstruction());
2942 
2943  bool Changed = false;
2944 
2945  // Mark any parameters that are known to be non-null with the nonnull
2946  // attribute. This is helpful for inlining calls to functions with null
2947  // checks on their arguments.
2948  SmallVector<unsigned, 4> Indices;
2949  unsigned ArgNo = 0;
2950 
2951  for (Value *V : CS.args()) {
2952  if (V->getType()->isPointerTy() &&
2953  !CS.paramHasAttr(ArgNo + 1, Attribute::NonNull) &&
2954  isKnownNonNullAt(V, CS.getInstruction(), &DT))
2955  Indices.push_back(ArgNo + 1);
2956  ArgNo++;
2957  }
2958 
2959  assert(ArgNo == CS.arg_size() && "sanity check");
2960 
2961  if (!Indices.empty()) {
2962  AttributeSet AS = CS.getAttributes();
2963  LLVMContext &Ctx = CS.getInstruction()->getContext();
2964  AS = AS.addAttribute(Ctx, Indices,
2965  Attribute::get(Ctx, Attribute::NonNull));
2966  CS.setAttributes(AS);
2967  Changed = true;
2968  }
2969 
2970  // If the callee is a pointer to a function, attempt to move any casts to the
2971  // arguments of the call/invoke.
2972  Value *Callee = CS.getCalledValue();
2973  if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
2974  return nullptr;
2975 
2976  if (Function *CalleeF = dyn_cast<Function>(Callee)) {
2977  // Remove the convergent attr on calls when the callee is not convergent.
2978  if (CS.isConvergent() && !CalleeF->isConvergent() &&
2979  !CalleeF->isIntrinsic()) {
2980  DEBUG(dbgs() << "Removing convergent attr from instr "
2981  << CS.getInstruction() << "\n");
2982  CS.setNotConvergent();
2983  return CS.getInstruction();
2984  }
2985 
2986  // If the call and callee calling conventions don't match, this call must
2987  // be unreachable, as the call is undefined.
2988  if (CalleeF->getCallingConv() != CS.getCallingConv() &&
2989  // Only do this for calls to a function with a body. A prototype may
2990  // not actually end up matching the implementation's calling conv for a
2991  // variety of reasons (e.g. it may be written in assembly).
2992  !CalleeF->isDeclaration()) {
2993  Instruction *OldCall = CS.getInstruction();
2994  new StoreInst(ConstantInt::getTrue(Callee->getContext()),
2996  OldCall);
2997  // If OldCall does not return void then replaceAllUsesWith undef.
2998  // This allows ValueHandlers and custom metadata to adjust itself.
2999  if (!OldCall->getType()->isVoidTy())
3000  replaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
3001  if (isa<CallInst>(OldCall))
3002  return eraseInstFromFunction(*OldCall);
3003 
3004  // We cannot remove an invoke, because it would change the CFG, just
3005  // change the callee to a null pointer.
3006  cast<InvokeInst>(OldCall)->setCalledFunction(
3007  Constant::getNullValue(CalleeF->getType()));
3008  return nullptr;
3009  }
3010  }
3011 
3012  if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
3013  // If CS does not return void then replaceAllUsesWith undef.
3014  // This allows ValueHandlers and custom metadata to adjust itself.
3015  if (!CS.getInstruction()->getType()->isVoidTy())
3016  replaceInstUsesWith(*CS.getInstruction(),
3018 
3019  if (isa<InvokeInst>(CS.getInstruction())) {
3020  // Can't remove an invoke because we cannot change the CFG.
3021  return nullptr;
3022  }
3023 
3024  // This instruction is not reachable, just remove it. We insert a store to
3025  // undef so that we know that this code is not reachable, despite the fact
3026  // that we can't modify the CFG here.
3027  new StoreInst(ConstantInt::getTrue(Callee->getContext()),
3029  CS.getInstruction());
3030 
3031  return eraseInstFromFunction(*CS.getInstruction());
3032  }
3033 
3034  if (IntrinsicInst *II = findInitTrampoline(Callee))
3035  return transformCallThroughTrampoline(CS, II);
3036 
3037  PointerType *PTy = cast<PointerType>(Callee->getType());
3038  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
3039  if (FTy->isVarArg()) {
3040  int ix = FTy->getNumParams();
3041  // See if we can optimize any arguments passed through the varargs area of
3042  // the call.
3043  for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
3044  E = CS.arg_end(); I != E; ++I, ++ix) {
3045  CastInst *CI = dyn_cast<CastInst>(*I);
3046  if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) {
3047  *I = CI->getOperand(0);
3048  Changed = true;
3049  }
3050  }
3051  }
3052 
3053  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
3054  // Inline asm calls cannot throw - mark them 'nounwind'.
3055  CS.setDoesNotThrow();
3056  Changed = true;
3057  }
3058 
3059  // Try to optimize the call if possible, we require DataLayout for most of
3060  // this. None of these calls are seen as possibly dead so go ahead and
3061  // delete the instruction now.
3062  if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
3063  Instruction *I = tryOptimizeCall(CI);
3064  // If we changed something return the result, etc. Otherwise let
3065  // the fallthrough check.
3066  if (I) return eraseInstFromFunction(*I);
3067  }
3068 
3069  return Changed ? CS.getInstruction() : nullptr;
3070 }
3071 
3072 /// If the callee is a constexpr cast of a function, attempt to move the cast to
3073 /// the arguments of the call/invoke.
3074 bool InstCombiner::transformConstExprCastCall(CallSite CS) {
3075  auto *Callee = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
3076  if (!Callee)
3077  return false;
3078 
3079  // The prototype of a thunk is a lie. Don't directly call such a function.
3080  if (Callee->hasFnAttribute("thunk"))
3081  return false;
3082 
3083  Instruction *Caller = CS.getInstruction();
3084  const AttributeSet &CallerPAL = CS.getAttributes();
3085 
3086  // Okay, this is a cast from a function to a different type. Unless doing so
3087  // would cause a type conversion of one of our arguments, change this call to
3088  // be a direct call with arguments casted to the appropriate types.
3089  //
3090  FunctionType *FT = Callee->getFunctionType();
3091  Type *OldRetTy = Caller->getType();
3092  Type *NewRetTy = FT->getReturnType();
3093 
3094  // Check to see if we are changing the return type...
3095  if (OldRetTy != NewRetTy) {
3096 
3097  if (NewRetTy->isStructTy())
3098  return false; // TODO: Handle multiple return values.
3099 
3100  if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
3101  if (Callee->isDeclaration())
3102  return false; // Cannot transform this return value.
3103 
3104  if (!Caller->use_empty() &&
3105  // void -> non-void is handled specially
3106  !NewRetTy->isVoidTy())
3107  return false; // Cannot transform this return value.
3108  }
3109 
3110  if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
3111  AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
3112  if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
3113  return false; // Attribute not compatible with transformed value.
3114  }
3115 
3116  // If the callsite is an invoke instruction, and the return value is used by
3117  // a PHI node in a successor, we cannot change the return type of the call
3118  // because there is no place to put the cast instruction (without breaking
3119  // the critical edge). Bail out in this case.
3120  if (!Caller->use_empty())
3121  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
3122  for (User *U : II->users())
3123  if (PHINode *PN = dyn_cast<PHINode>(U))
3124  if (PN->getParent() == II->getNormalDest() ||
3125  PN->getParent() == II->getUnwindDest())
3126  return false;
3127  }
3128 
3129  unsigned NumActualArgs = CS.arg_size();
3130  unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
3131 
3132  // Prevent us turning:
3133  // declare void @takes_i32_inalloca(i32* inalloca)
3134  // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
3135  //
3136  // into:
3137  // call void @takes_i32_inalloca(i32* null)
3138  //
3139  // Similarly, avoid folding away bitcasts of byval calls.
3140  if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
3141  Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
3142  return false;
3143 
3145  for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
3146  Type *ParamTy = FT->getParamType(i);
3147  Type *ActTy = (*AI)->getType();
3148 
3149  if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
3150  return false; // Cannot transform this parameter value.
3151 
3152  if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1).
3153  overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
3154  return false; // Attribute not compatible with transformed value.
3155 
3156  if (CS.isInAllocaArgument(i))
3157  return false; // Cannot transform to and from inalloca.
3158 
3159  // If the parameter is passed as a byval argument, then we have to have a
3160  // sized type and the sized type has to have the same size as the old type.
3161  if (ParamTy != ActTy &&
3162  CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1,
3163  Attribute::ByVal)) {
3164  PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
3165  if (!ParamPTy || !ParamPTy->getElementType()->isSized())
3166  return false;
3167 
3168  Type *CurElTy = ActTy->getPointerElementType();
3169  if (DL.getTypeAllocSize(CurElTy) !=
3170  DL.getTypeAllocSize(ParamPTy->getElementType()))
3171  return false;
3172  }
3173  }
3174 
3175  if (Callee->isDeclaration()) {
3176  // Do not delete arguments unless we have a function body.
3177  if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
3178  return false;
3179 
3180  // If the callee is just a declaration, don't change the varargsness of the
3181  // call. We don't want to introduce a varargs call where one doesn't
3182  // already exist.
3183  PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
3184  if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
3185  return false;
3186 
3187  // If both the callee and the cast type are varargs, we still have to make
3188  // sure the number of fixed parameters are the same or we have the same
3189  // ABI issues as if we introduce a varargs call.
3190  if (FT->isVarArg() &&
3191  cast<FunctionType>(APTy->getElementType())->isVarArg() &&
3192  FT->getNumParams() !=
3193  cast<FunctionType>(APTy->getElementType())->getNumParams())
3194  return false;
3195  }
3196 
3197  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
3198  !CallerPAL.isEmpty())
3199  // In this case we have more arguments than the new function type, but we
3200  // won't be dropping them. Check that these extra arguments have attributes
3201  // that are compatible with being a vararg call argument.
3202  for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
3203  unsigned Index = CallerPAL.getSlotIndex(i - 1);
3204  if (Index <= FT->getNumParams())
3205  break;
3206 
3207  // Check if it has an attribute that's incompatible with varargs.
3208  AttributeSet PAttrs = CallerPAL.getSlotAttributes(i - 1);
3209  if (PAttrs.hasAttribute(Index, Attribute::StructRet))
3210  return false;
3211  }
3212 
3213 
3214  // Okay, we decided that this is a safe thing to do: go ahead and start
3215  // inserting cast instructions as necessary.
3216  std::vector<Value*> Args;
3217  Args.reserve(NumActualArgs);
3219  attrVec.reserve(NumCommonArgs);
3220 
3221  // Get any return attributes.
3222  AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
3223 
3224  // If the return value is not being used, the type may not be compatible
3225  // with the existing attributes. Wipe out any problematic attributes.
3226  RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
3227 
3228  // Add the new return attributes.
3229  if (RAttrs.hasAttributes())
3230  attrVec.push_back(AttributeSet::get(Caller->getContext(),
3231  AttributeSet::ReturnIndex, RAttrs));
3232 
3233  AI = CS.arg_begin();
3234  for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
3235  Type *ParamTy = FT->getParamType(i);
3236 
3237  if ((*AI)->getType() == ParamTy) {
3238  Args.push_back(*AI);
3239  } else {
3240  Args.push_back(Builder->CreateBitOrPointerCast(*AI, ParamTy));
3241  }
3242 
3243  // Add any parameter attributes.
3244  AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1);
3245  if (PAttrs.hasAttributes())
3246  attrVec.push_back(AttributeSet::get(Caller->getContext(), i + 1,
3247  PAttrs));
3248  }
3249 
3250  // If the function takes more arguments than the call was taking, add them
3251  // now.
3252  for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
3253  Args.push_back(Constant::getNullValue(FT->getParamType(i)));
3254 
3255  // If we are removing arguments to the function, emit an obnoxious warning.
3256  if (FT->getNumParams() < NumActualArgs) {
3257  // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
3258  if (FT->isVarArg()) {
3259  // Add all of the arguments in their promoted form to the arg list.
3260  for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
3261  Type *PTy = getPromotedType((*AI)->getType());
3262  if (PTy != (*AI)->getType()) {
3263  // Must promote to pass through va_arg area!
3264  Instruction::CastOps opcode =
3265  CastInst::getCastOpcode(*AI, false, PTy, false);
3266  Args.push_back(Builder->CreateCast(opcode, *AI, PTy));
3267  } else {
3268  Args.push_back(*AI);
3269  }
3270 
3271  // Add any parameter attributes.
3272  AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1);
3273  if (PAttrs.hasAttributes())
3274  attrVec.push_back(AttributeSet::get(FT->getContext(), i + 1,
3275  PAttrs));
3276  }
3277  }
3278  }
3279 
3280  AttributeSet FnAttrs = CallerPAL.getFnAttributes();
3282  attrVec.push_back(AttributeSet::get(Callee->getContext(), FnAttrs));
3283 
3284  if (NewRetTy->isVoidTy())
3285  Caller->setName(""); // Void type should not have a name.
3286 
3287  const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(),
3288  attrVec);
3289 
3291  CS.getOperandBundlesAsDefs(OpBundles);
3292 
3293  Instruction *NC;
3294  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
3295  NC = Builder->CreateInvoke(Callee, II->getNormalDest(), II->getUnwindDest(),
3296  Args, OpBundles);
3297  NC->takeName(II);
3298  cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
3299  cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
3300  } else {
3301  CallInst *CI = cast<CallInst>(Caller);
3302  NC = Builder->CreateCall(Callee, Args, OpBundles);
3303  NC->takeName(CI);
3304  cast<CallInst>(NC)->setTailCallKind(CI->getTailCallKind());
3305  cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
3306  cast<CallInst>(NC)->setAttributes(NewCallerPAL);
3307  }
3308 
3309  // Insert a cast of the return type as necessary.
3310  Value *NV = NC;
3311  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
3312  if (!NV->getType()->isVoidTy()) {
3313  NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
3314  NC->setDebugLoc(Caller->getDebugLoc());
3315 
3316  // If this is an invoke instruction, we should insert it after the first
3317  // non-phi, instruction in the normal successor block.
3318  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
3319  BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
3320  InsertNewInstBefore(NC, *I);
3321  } else {
3322  // Otherwise, it's a call, just insert cast right after the call.
3323  InsertNewInstBefore(NC, *Caller);
3324  }
3325  Worklist.AddUsersToWorkList(*Caller);
3326  } else {
3327  NV = UndefValue::get(Caller->getType());
3328  }
3329  }
3330 
3331  if (!Caller->use_empty())
3332  replaceInstUsesWith(*Caller, NV);
3333  else if (Caller->hasValueHandle()) {
3334  if (OldRetTy == NV->getType())
3335  ValueHandleBase::ValueIsRAUWd(Caller, NV);
3336  else
3337  // We cannot call ValueIsRAUWd with a different type, and the
3338  // actual tracked value will disappear.
3340  }
3341 
3342  eraseInstFromFunction(*Caller);
3343  return true;
3344 }
3345 
3346 /// Turn a call to a function created by init_trampoline / adjust_trampoline
3347 /// intrinsic pair into a direct call to the underlying function.
3348 Instruction *
3349 InstCombiner::transformCallThroughTrampoline(CallSite CS,
3350  IntrinsicInst *Tramp) {
3351  Value *Callee = CS.getCalledValue();
3352  PointerType *PTy = cast<PointerType>(Callee->getType());
3353  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
3354  const AttributeSet &Attrs = CS.getAttributes();
3355 
3356  // If the call already has the 'nest' attribute somewhere then give up -
3357  // otherwise 'nest' would occur twice after splicing in the chain.
3358  if (Attrs.hasAttrSomewhere(Attribute::Nest))
3359  return nullptr;
3360 
3361  assert(Tramp &&
3362  "transformCallThroughTrampoline called with incorrect CallSite.");
3363 
3364  Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
3365  FunctionType *NestFTy = cast<FunctionType>(NestF->getValueType());
3366 
3367  const AttributeSet &NestAttrs = NestF->getAttributes();
3368  if (!NestAttrs.isEmpty()) {
3369  unsigned NestIdx = 1;
3370  Type *NestTy = nullptr;
3371  AttributeSet NestAttr;
3372 
3373  // Look for a parameter marked with the 'nest' attribute.
3374  for (FunctionType::param_iterator I = NestFTy->param_begin(),
3375  E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
3376  if (NestAttrs.hasAttribute(NestIdx, Attribute::Nest)) {
3377  // Record the parameter type and any other attributes.
3378  NestTy = *I;
3379  NestAttr = NestAttrs.getParamAttributes(NestIdx);
3380  break;
3381  }
3382 
3383  if (NestTy) {
3384  Instruction *Caller = CS.getInstruction();
3385  std::vector<Value*> NewArgs;
3386  NewArgs.reserve(CS.arg_size() + 1);
3387 
3389  NewAttrs.reserve(Attrs.getNumSlots() + 1);
3390 
3391  // Insert the nest argument into the call argument list, which may
3392  // mean appending it. Likewise for attributes.
3393 
3394  // Add any result attributes.
3395  if (Attrs.hasAttributes(AttributeSet::ReturnIndex))
3396  NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
3397  Attrs.getRetAttributes()));
3398 
3399  {
3400  unsigned Idx = 1;
3401  CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
3402  do {
3403  if (Idx == NestIdx) {
3404  // Add the chain argument and attributes.
3405  Value *NestVal = Tramp->getArgOperand(2);
3406  if (NestVal->getType() != NestTy)
3407  NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest");
3408  NewArgs.push_back(NestVal);
3409  NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
3410  NestAttr));
3411  }
3412 
3413  if (I == E)
3414  break;
3415 
3416  // Add the original argument and attributes.
3417  NewArgs.push_back(*I);
3418  AttributeSet Attr = Attrs.getParamAttributes(Idx);
3419  if (Attr.hasAttributes(Idx)) {
3420  AttrBuilder B(Attr, Idx);
3421  NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
3422  Idx + (Idx >= NestIdx), B));
3423  }
3424 
3425  ++Idx;
3426  ++I;
3427  } while (true);
3428  }
3429 
3430  // Add any function attributes.
3431  if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
3432  NewAttrs.push_back(AttributeSet::get(FTy->getContext(),
3433  Attrs.getFnAttributes()));
3434 
3435  // The trampoline may have been bitcast to a bogus type (FTy).
3436  // Handle this by synthesizing a new function type, equal to FTy
3437  // with the chain parameter inserted.
3438 
3439  std::vector<Type*> NewTypes;
3440  NewTypes.reserve(FTy->getNumParams()+1);
3441 
3442  // Insert the chain's type into the list of parameter types, which may
3443  // mean appending it.
3444  {
3445  unsigned Idx = 1;
3446  FunctionType::param_iterator I = FTy->param_begin(),
3447  E = FTy->param_end();
3448 
3449  do {
3450  if (Idx == NestIdx)
3451  // Add the chain's type.
3452  NewTypes.push_back(NestTy);
3453 
3454  if (I == E)
3455  break;
3456 
3457  // Add the original type.
3458  NewTypes.push_back(*I);
3459 
3460  ++Idx;
3461  ++I;
3462  } while (true);
3463  }
3464 
3465  // Replace the trampoline call with a direct call. Let the generic
3466  // code sort out any function type mismatches.
3467  FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
3468  FTy->isVarArg());
3469  Constant *NewCallee =
3470  NestF->getType() == PointerType::getUnqual(NewFTy) ?
3471  NestF : ConstantExpr::getBitCast(NestF,
3472  PointerType::getUnqual(NewFTy));
3473  const AttributeSet &NewPAL =
3474  AttributeSet::get(FTy->getContext(), NewAttrs);
3475 
3477  CS.getOperandBundlesAsDefs(OpBundles);
3478 
3479  Instruction *NewCaller;
3480  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
3481  NewCaller = InvokeInst::Create(NewCallee,
3482  II->getNormalDest(), II->getUnwindDest(),
3483  NewArgs, OpBundles);
3484  cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
3485  cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
3486  } else {
3487  NewCaller = CallInst::Create(NewCallee, NewArgs, OpBundles);
3488  cast<CallInst>(NewCaller)->setTailCallKind(
3489  cast<CallInst>(Caller)->getTailCallKind());
3490  cast<CallInst>(NewCaller)->setCallingConv(
3491  cast<CallInst>(Caller)->getCallingConv());
3492  cast<CallInst>(NewCaller)->setAttributes(NewPAL);
3493  }
3494 
3495  return NewCaller;
3496  }
3497  }
3498 
3499  // Replace the trampoline call with a direct call. Since there is no 'nest'
3500  // parameter, there is no need to adjust the argument list. Let the generic
3501  // code sort out any function type mismatches.
3502  Constant *NewCallee =
3503  NestF->getType() == PTy ? NestF :
3504  ConstantExpr::getBitCast(NestF, PTy);
3505  CS.setCalledFunction(NewCallee);
3506  return CS.getInstruction();
3507 }
MachineLoop * L
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type (if unknown returns 0).
IterTy arg_end() const
Definition: CallSite.h:532
const NoneType None
Definition: None.h:23
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double, and whose elements are just simple data values (i.e.
Definition: Constants.h:739
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:506
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::ZeroOrMore, cl::values(clEnumValN(DefaultIT,"arm-default-it","Generate IT block based on arch"), clEnumValN(RestrictedIT,"arm-restrict-it","Disallow deprecated IT based on ARMv8"), clEnumValN(NoRestrictedIT,"arm-no-restrict-it","Allow IT blocks based on ARMv7")))
LibCallSimplifier - This class implements a collection of optimizations that replace well formed call...
const Value * getCalledValue() const
Get a pointer to the function that is invoked by this instruction.
void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMin(const Opnd0 &Op0, const Opnd1 &Op1)
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction, which must be an operator which supports these flags.
void setDoesNotThrow()
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:177
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:64
static void ValueIsDeleted(Value *V)
Definition: Value.cpp:781
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1554
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:166
Value * SimplifyCall(Value *V, User::op_iterator ArgBegin, User::op_iterator ArgEnd, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr)
Given a function and iterators over arguments, fold the result or return null.
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1019
void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Definition: CallSite.h:539
static Value * simplifyX86immShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:458
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1309
DiagnosticInfoOptimizationBase::Argument NV
bool isNaN() const
Definition: APFloat.h:1033
STATISTIC(NumFunctions,"Total number of functions")
AttributeSet getParamAttributes(unsigned Index) const
The attributes for the specified index are returned.
Definition: Attributes.cpp:968
size_t i
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:52
unsigned getNumParams() const
Return the number of fixed parameters this function type requires.
Definition: DerivedTypes.h:137
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMax(const Opnd0 &Op0, const Opnd1 &Op1)
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
match_zero m_Zero()
Match an arbitrary zero/null constant.
Definition: PatternMatch.h:137
Instruction * visitVACopyInst(VACopyInst &I)
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1254
Type::subtype_iterator param_iterator
Definition: DerivedTypes.h:125
Constant * getElementAsConstant(unsigned i) const
Return a Constant for a specified index's element.
Definition: Constants.cpp:2641
Type * getValueType() const
Definition: GlobalValue.h:261
static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC)
This class represents a function call, abstracting a target machine's calling convention.
bool isConvergent() const
Determine if the call is convergent.
Definition: CallSite.h:478
This file contains the declarations for metadata subclasses.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:536
void setAttributes(AttributeSet PAL)
Definition: CallSite.h:328
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:83
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:655
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
unsigned less or equal
Definition: InstrTypes.h:906
unsigned less than
Definition: InstrTypes.h:905
const DataLayout & getDataLayout() const
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC)
bool isSignaling() const
Definition: APFloat.h:1037
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
This class wraps the llvm.memset intrinsic.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:736
bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr)
Return true if it is valid to use the assumptions provided by an assume intrinsic, I, at the point in the control-flow identified by the context instruction, CxtI.
Metadata node.
Definition: Metadata.h:830
An instruction for reading from memory.
Definition: Instructions.h:164
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:170
static OverflowCheckFlavor IntrinsicIDToOverflowCheckFlavor(unsigned ID)
Returns the OverflowCheckFlavor corresponding to a overflow_with_op intrinsic.
fneg_match< LHS > m_FNeg(const LHS &L)
Match a floating point negate.
Definition: PatternMatch.h:900
Type * getElementType() const
Definition: DerivedTypes.h:462
void reserve(size_type N)
Definition: SmallVector.h:377
bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const
Return true if the attribute exists at the given index.
Definition: Attributes.cpp:994
bool isByValOrInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed by value or in an alloca.
Definition: CallSite.h:565
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:999
static Instruction * simplifyMaskedStore(IntrinsicInst &II, InstCombiner &IC)
Instruction * visitVAStartInst(VAStartInst &I)
void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, unsigned Depth, Instruction *CxtI) const
bool isGCRelocate(ImmutableCallSite CS)
Definition: Statepoint.cpp:43
Type * getPointerElementType() const
Definition: Type.h:358
unsigned arg_size() const
Definition: CallSite.h:211
const CallInst * isFreeCall(const Value *I, const TargetLibraryInfo *TLI)
isFreeCall - Returns non-null if the value is a call to the builtin free()
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:195
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:191
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op...
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:228
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:239
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:942
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:41
Instruction * visitInvokeInst(InvokeInst &II)
static Constant * getIntegerCast(Constant *C, Type *Ty, bool isSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:1535
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:370
OverflowCheckFlavor
Specific patterns of overflow check idioms that we match.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static Value * simplifyX86movmsk(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:578
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:143
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:440
This class wraps the llvm.memmove intrinsic.
Class to represent struct types.
Definition: DerivedTypes.h:199
ValTy * getCalledValue() const
getCalledValue - Return the pointer to function that is being called.
Definition: CallSite.h:102
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.cpp:1122
unsigned getNumArgOperands() const
Return the number of call arguments.
Instruction * eraseInstFromFunction(Instruction &I)
Combiner aware instruction erasure.
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
Definition: PatternMatch.h:801
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:994
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:588
The core instruction combiner logic.
static bool isSafeToEliminateVarargsCast(const CallSite CS, const DataLayout &DL, const CastInst *const CI, const int ix)
If this cast does not affect the value passed through the varargs area, we can eliminate the use of t...
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:257
This file implements a class to represent arbitrary precision integral constant values and operations...
not_match< LHS > m_Not(const LHS &L)
Definition: PatternMatch.h:854
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
bool doesNotThrow() const
Determine if the call cannot unwind.
All zero aggregate value.
Definition: Constants.h:338
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
static Value * simplifyX86vpermv(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:850
Class to represent function types.
Definition: DerivedTypes.h:102
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1362
#define F(x, y, z)
Definition: MD5.cpp:51
CallingConv::ID getCallingConv() const
getCallingConv/setCallingConv - get or set the calling convention of the call.
Definition: CallSite.h:308
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:1323
This represents the llvm.va_start intrinsic.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
#define T
Class to represent array types.
Definition: DerivedTypes.h:345
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:256
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:291
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.h:1755
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:949
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:68
An instruction for storing to memory.
Definition: Instructions.h:300
SelectClass_match< Cond, LHS, RHS > m_Select(const Cond &C, const LHS &L, const RHS &R)
Definition: PatternMatch.h:758
static void ValueIsRAUWd(Value *Old, Value *New)
Definition: Value.cpp:837
static Value * simplifyX86vpcom(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned)
Decode XOP integer vector comparison intrinsics.
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:263
This class represents a truncation of integer types.
static unsigned getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition: Local.h:183
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
Class to represent pointers.
Definition: DerivedTypes.h:443
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
static Value * simplifyX86vperm2(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
The shuffle mask for a perm2*128 selects any two halves of two 256-bit source vectors, unless a zero bit is set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:518
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1695
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:254
bool isDenormal() const
Definition: APFloat.h:1036
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
MinAlign - A and B are either alignments or offsets.
Definition: MathExtras.h:589
unsigned getNumSlots() const
Return the number of slots used in this attribute list.
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:133
bool isLosslessCast() const
A lossless cast is one that does not alter the basic value.
iterator_range< IterTy > args() const
Definition: CallSite.h:207
static Instruction * simplifyMaskedScatter(IntrinsicInst &II, InstCombiner &IC)
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:52
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1323
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:969
constexpr bool isPowerOf2_32(uint32_t Value)
isPowerOf2_32 - This function returns true if the argument is a power of two > 0. ...
Definition: MathExtras.h:399
ConstantInt * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to .objectsize into an integer value of the given Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
Definition: PatternMatch.h:512
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:287
AttributeSet getSlotAttributes(unsigned Slot) const
Return the attributes at the given slot.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:219
AttributeSet addAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const
Add an attribute to the attribute set at the given index.
Definition: Attributes.cpp:753
static Type * reduceToSingleValueType(Type *T)
Given an aggregate type which ultimately holds a single scalar element, like {{{type}}} or [1 x type]...
This is an important base class in LLVM.
Definition: Constant.h:42
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:1609
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:322
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:259
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:654
This file declares a class to represent arbitrary precision floating point values and provide a varie...
std::underlying_type< E >::type Underlying(E Val)
Check that Val is in range for E, and return Val cast to E's underlying type.
Definition: BitmaskEnum.h:91
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:880
static const unsigned End
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1119
bool isGCResult(ImmutableCallSite CS)
Definition: Statepoint.cpp:47
uint64_t getNumElements() const
Definition: DerivedTypes.h:335
Value * getOperand(unsigned i) const
Definition: User.h:145
self_iterator getIterator()
Definition: ilist_node.h:81
Class to represent integer types.
Definition: DerivedTypes.h:39
bool hasValueHandle() const
Return true if there is a value handle associated with this value.
Definition: Value.h:456
void setNotConvergent()
Definition: CallSite.h:484
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:265
void setAlignment(unsigned Align)
static Value * simplifyX86varShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:213
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1337
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:654
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1183
User::op_iterator arg_iterator
arg_iterator - The type of iterator to use when looping over actual arguments at this call site...
Definition: CallSite.h:205
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1118
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
signed greater than
Definition: InstrTypes.h:907
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:232
static Value * simplifyX86extrq(IntrinsicInst &II, Value *Op0, ConstantInt *CILength, ConstantInt *CIIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding or conversion to a shuffle...
static InvokeInst * Create(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, Instruction *InsertBefore=nullptr)
AttributeSet getAttributes() const
getAttributes/setAttributes - get or set the parameter attributes of the call.
Definition: CallSite.h:325
IterTy arg_begin() const
Definition: CallSite.h:528
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.cpp:703
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:156
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:234
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
This is the common base class for memset/memcpy/memmove.
Iterator for intrusive lists based on ilist_node.
static PointerType * getInt1PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:209
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the generic address space (address sp...
Definition: DerivedTypes.h:458
This is the shared class of boolean and integer constants.
Definition: Constants.h:88
InstrTy * getInstruction() const
Definition: CallSite.h:93
static Value * simplifyX86vpermilvar(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermilvar* to shufflevector if the mask is constant.
bool isNegative() const
Definition: APFloat.h:1035
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:408
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions that feed it, giving the original input.
iterator end()
Definition: BasicBlock.h:230
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:58
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
static CallInst * Create(Value *Func, ArrayRef< Value * > Args, ArrayRef< OperandBundleDef > Bundles=None, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
TailCallKind getTailCallKind() const
static Instruction * simplifyMaskedGather(IntrinsicInst &II, InstCombiner &IC)
void setDoesNotThrow()
Definition: CallSite.h:465
signed less than
Definition: InstrTypes.h:909
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, unsigned Align, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:252
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition: APFloat.h:1095
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:1656
Value * stripPointerCasts()
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:490
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:558
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.cpp:572
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:623
#define NC
Definition: regutils.h:42
CallInst * CreateMaskedLoad(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:232
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:506
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
void setOperand(unsigned i, Value *Val)
Definition: User.h:150
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
AttributeSet getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:176
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
signed less or equal
Definition: InstrTypes.h:910
Class to represent vector types.
Definition: DerivedTypes.h:369
Class for arbitrary precision integers.
Definition: APInt.h:77
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:195
iterator_range< user_iterator > users()
Definition: Value.h:370
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:438
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:932
static Value * simplifyX86pshufb(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert pshufb* to shufflevector if the mask is constant.
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:207
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:332
bool isInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed in an alloca.
Definition: CallSite.h:560
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1132
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
Definition: PatternMatch.h:346
bool isAllOnesValue() const
Determine if all bits are set.
Definition: APInt.h:342
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:172
static Value * simplifyMinnumMaxnum(const IntrinsicInst &II)
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:259
void setCalledFunction(Value *Fn)
Set the function called.
This class wraps the llvm.memcpy/memmove intrinsics.
static Value * simplifyMaskedLoad(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Instruction * simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC)
static bool maskIsAllOneOrUndef(Value *Mask)
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:169
unsigned getSlotIndex(unsigned Slot) const
Return the index for the given slot.
OverflowResult
unsigned greater or equal
Definition: InstrTypes.h:904
bool hasAttributes(unsigned Index) const
Return true if attribute exists at the given index.
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:124
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:383
void setArgOperand(unsigned i, Value *v)
bool paramHasAttr(unsigned i, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:359
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc...
unsigned getNumElements() const
Return the number of elements in the array or vector.
Definition: Constants.cpp:2314
CallInst * CreateCall(Value *Callee, ArrayRef< Value * > Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1579
static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID, unsigned EndID, InstCombiner &IC)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
bool isKnownNonNullAt(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Return true if this pointer couldn't possibly be null.
static IntrinsicInst * findInitTrampoline(Value *Callee)
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:64
const APFloat & getValueAPF() const
Definition: Constants.h:300
bool isVarArg() const
Definition: DerivedTypes.h:122
static Value * simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, APInt APLength, APInt APIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant folding or conversion to a shu...
bool isStatepoint(ImmutableCallSite CS)
Definition: Statepoint.cpp:27
static Constant * getNegativeIsTrueBoolVec(ConstantDataVector *V)
Return a constant boolean vector that has true elements in all positions where the input constant dat...
Type * getElementType() const
Return the element type of the array/vector.
Definition: Constants.cpp:2291
iterator_range< op_iterator > arg_operands()
Iteration adapter for range-for loops.
bool use_empty() const
Definition: Value.h:299
static Value * emitX86MaskSelect(Value *Mask, Value *Op0, Value *Op1, InstCombiner::BuilderTy &Builder)
This represents the llvm.va_copy intrinsic.
Type * getReturnType() const
Definition: DerivedTypes.h:123
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
LoadInst * CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name)
Definition: IRBuilder.h:1100
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:108
LLVM Value Representation.
Definition: Value.h:71
void setAlignment(unsigned Align)
This file provides internal interfaces used to implement the InstCombine.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:111
bool isZero() const
Definition: APFloat.h:1031
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:244
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:631
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:239
AttrBuilder typeIncompatible(Type *Ty)
Which attributes cannot be applied to a type.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:81
bool isInfinity() const
Definition: APFloat.h:1032
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:951
Invoke instruction.
#define DEBUG(X)
Definition: Debug.h:100
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1343
bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Return true if the given value is known to be non-zero when defined.
IRTranslator LLVM IR MI
unsigned greater than
Definition: InstrTypes.h:903
CallingConv::ID getCallingConv() const
getCallingConv/setCallingConv - Get or set the calling convention of this function call...
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml","ocaml 3.10-compatible collector")
int * Ptr
static Constant * getMul(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2154
static Value * simplifyX86insertps(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
This class represents an extension of floating point types.
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:167
void setCalledFunction(Value *V)
setCalledFunction - Set the callee to the specified value.
Definition: CallSite.h:116
bool isEmpty() const
Return true if there are no attributes.
Definition: Attributes.h:400
const fltSemantics & getSemantics() const
Definition: APFloat.h:1043
static Type * getPromotedType(Type *Ty)
Return the specified type promoted as it would be to pass though a va_arg area.
const BasicBlock * getParent() const
Definition: Instruction.h:62
bool doesNotThrow() const
Determine if the call cannot unwind.
Definition: CallSite.h:462
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1107
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:300
bool isNormal() const
Definition: APFloat.h:1039
signed greater or equal
Definition: InstrTypes.h:908
User * user_back()
Definition: Value.h:356
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:726
AttributeSet getFnAttributes() const
The function attributes are returned.
Definition: Attributes.cpp:985