LLVM  6.0.0svn
InstCombineCalls.cpp
Go to the documentation of this file.
1 //===- InstCombineCalls.cpp -----------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the visitCall and visitInvoke functions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/ADT/Twine.h"
28 #include "llvm/IR/Attributes.h"
29 #include "llvm/IR/BasicBlock.h"
30 #include "llvm/IR/CallSite.h"
31 #include "llvm/IR/Constant.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DataLayout.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/GlobalVariable.h"
37 #include "llvm/IR/InstrTypes.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/IntrinsicInst.h"
41 #include "llvm/IR/Intrinsics.h"
42 #include "llvm/IR/LLVMContext.h"
43 #include "llvm/IR/Metadata.h"
44 #include "llvm/IR/PatternMatch.h"
45 #include "llvm/IR/Statepoint.h"
46 #include "llvm/IR/Type.h"
47 #include "llvm/IR/User.h"
48 #include "llvm/IR/Value.h"
49 #include "llvm/IR/ValueHandle.h"
51 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/Compiler.h"
54 #include "llvm/Support/Debug.h"
56 #include "llvm/Support/KnownBits.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <cstring>
66 #include <utility>
67 #include <vector>
68 
69 using namespace llvm;
70 using namespace PatternMatch;
71 
72 #define DEBUG_TYPE "instcombine"
73 
74 STATISTIC(NumSimplified, "Number of library calls simplified");
75 
77  "unfold-element-atomic-memcpy-max-elements",
78  cl::init(16),
79  cl::desc("Maximum number of elements in atomic memcpy the optimizer is "
80  "allowed to unfold"));
81 
82 /// Return the specified type promoted as it would be to pass though a va_arg
83 /// area.
84 static Type *getPromotedType(Type *Ty) {
85  if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
86  if (ITy->getBitWidth() < 32)
87  return Type::getInt32Ty(Ty->getContext());
88  }
89  return Ty;
90 }
91 
92 /// Return a constant boolean vector that has true elements in all positions
93 /// where the input constant data vector has an element with the sign bit set.
96  IntegerType *BoolTy = Type::getInt1Ty(V->getContext());
97  for (unsigned I = 0, E = V->getNumElements(); I != E; ++I) {
98  Constant *Elt = V->getElementAsConstant(I);
99  assert((isa<ConstantInt>(Elt) || isa<ConstantFP>(Elt)) &&
100  "Unexpected constant data vector element type");
101  bool Sign = V->getElementType()->isIntegerTy()
102  ? cast<ConstantInt>(Elt)->isNegative()
103  : cast<ConstantFP>(Elt)->isNegative();
104  BoolVec.push_back(ConstantInt::get(BoolTy, Sign));
105  }
106  return ConstantVector::get(BoolVec);
107 }
108 
109 Instruction *
110 InstCombiner::SimplifyElementUnorderedAtomicMemCpy(AtomicMemCpyInst *AMI) {
111  // Try to unfold this intrinsic into sequence of explicit atomic loads and
112  // stores.
113  // First check that number of elements is compile time constant.
114  auto *LengthCI = dyn_cast<ConstantInt>(AMI->getLength());
115  if (!LengthCI)
116  return nullptr;
117 
118  // Check that there are not too many elements.
119  uint64_t LengthInBytes = LengthCI->getZExtValue();
120  uint32_t ElementSizeInBytes = AMI->getElementSizeInBytes();
121  uint64_t NumElements = LengthInBytes / ElementSizeInBytes;
122  if (NumElements >= UnfoldElementAtomicMemcpyMaxElements)
123  return nullptr;
124 
125  // Only expand if there are elements to copy.
126  if (NumElements > 0) {
127  // Don't unfold into illegal integers
128  uint64_t ElementSizeInBits = ElementSizeInBytes * 8;
129  if (!getDataLayout().isLegalInteger(ElementSizeInBits))
130  return nullptr;
131 
132  // Cast source and destination to the correct type. Intrinsic input
133  // arguments are usually represented as i8*. Often operands will be
134  // explicitly casted to i8* and we can just strip those casts instead of
135  // inserting new ones. However it's easier to rely on other InstCombine
136  // rules which will cover trivial cases anyway.
137  Value *Src = AMI->getRawSource();
138  Value *Dst = AMI->getRawDest();
139  Type *ElementPointerType =
140  Type::getIntNPtrTy(AMI->getContext(), ElementSizeInBits,
141  Src->getType()->getPointerAddressSpace());
142 
143  Value *SrcCasted = Builder.CreatePointerCast(Src, ElementPointerType,
144  "memcpy_unfold.src_casted");
145  Value *DstCasted = Builder.CreatePointerCast(Dst, ElementPointerType,
146  "memcpy_unfold.dst_casted");
147 
148  for (uint64_t i = 0; i < NumElements; ++i) {
149  // Get current element addresses
150  ConstantInt *ElementIdxCI =
151  ConstantInt::get(AMI->getContext(), APInt(64, i));
152  Value *SrcElementAddr =
153  Builder.CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr");
154  Value *DstElementAddr =
155  Builder.CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr");
156 
157  // Load from the source. Transfer alignment information and mark load as
158  // unordered atomic.
159  LoadInst *Load = Builder.CreateLoad(SrcElementAddr, "memcpy_unfold.val");
161  // We know alignment of the first element. It is also guaranteed by the
162  // verifier that element size is less or equal than first element
163  // alignment and both of this values are powers of two. This means that
164  // all subsequent accesses are at least element size aligned.
165  // TODO: We can infer better alignment but there is no evidence that this
166  // will matter.
167  Load->setAlignment(i == 0 ? AMI->getParamAlignment(1)
168  : ElementSizeInBytes);
169  Load->setDebugLoc(AMI->getDebugLoc());
170 
171  // Store loaded value via unordered atomic store.
172  StoreInst *Store = Builder.CreateStore(Load, DstElementAddr);
174  Store->setAlignment(i == 0 ? AMI->getParamAlignment(0)
175  : ElementSizeInBytes);
176  Store->setDebugLoc(AMI->getDebugLoc());
177  }
178  }
179 
180  // Set the number of elements of the copy to 0, it will be deleted on the
181  // next iteration.
182  AMI->setLength(Constant::getNullValue(LengthCI->getType()));
183  return AMI;
184 }
185 
186 Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
187  unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, &AC, &DT);
188  unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, &AC, &DT);
189  unsigned MinAlign = std::min(DstAlign, SrcAlign);
190  unsigned CopyAlign = MI->getAlignment();
191 
192  if (CopyAlign < MinAlign) {
193  MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), MinAlign, false));
194  return MI;
195  }
196 
197  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
198  // load/store.
199  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
200  if (!MemOpLength) return nullptr;
201 
202  // Source and destination pointer types are always "i8*" for intrinsic. See
203  // if the size is something we can handle with a single primitive load/store.
204  // A single load+store correctly handles overlapping memory in the memmove
205  // case.
206  uint64_t Size = MemOpLength->getLimitedValue();
207  assert(Size && "0-sized memory transferring should be removed already.");
208 
209  if (Size > 8 || (Size&(Size-1)))
210  return nullptr; // If not 1/2/4/8 bytes, exit.
211 
212  // Use an integer load+store unless we can find something better.
213  unsigned SrcAddrSp =
214  cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
215  unsigned DstAddrSp =
216  cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
217 
218  IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
219  Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
220  Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
221 
222  // If the memcpy has metadata describing the members, see if we can get the
223  // TBAA tag describing our copy.
224  MDNode *CopyMD = nullptr;
225  if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
226  if (M->getNumOperands() == 3 && M->getOperand(0) &&
227  mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
228  mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() &&
229  M->getOperand(1) &&
230  mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
231  mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
232  Size &&
233  M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
234  CopyMD = cast<MDNode>(M->getOperand(2));
235  }
236 
237  // If the memcpy/memmove provides better alignment info than we can
238  // infer, use it.
239  SrcAlign = std::max(SrcAlign, CopyAlign);
240  DstAlign = std::max(DstAlign, CopyAlign);
241 
242  Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
243  Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
244  LoadInst *L = Builder.CreateLoad(Src, MI->isVolatile());
245  L->setAlignment(SrcAlign);
246  if (CopyMD)
247  L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
248  MDNode *LoopMemParallelMD =
250  if (LoopMemParallelMD)
252 
253  StoreInst *S = Builder.CreateStore(L, Dest, MI->isVolatile());
254  S->setAlignment(DstAlign);
255  if (CopyMD)
256  S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
257  if (LoopMemParallelMD)
259 
260  // Set the size of the copy to 0, it will be deleted on the next iteration.
261  MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType()));
262  return MI;
263 }
264 
265 Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
266  unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
267  if (MI->getAlignment() < Alignment) {
268  MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
269  Alignment, false));
270  return MI;
271  }
272 
273  // Extract the length and alignment and fill if they are constant.
274  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
275  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
276  if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
277  return nullptr;
278  uint64_t Len = LenC->getLimitedValue();
279  Alignment = MI->getAlignment();
280  assert(Len && "0-sized memory setting should be removed already.");
281 
282  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
283  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
284  Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
285 
286  Value *Dest = MI->getDest();
287  unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
288  Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
289  Dest = Builder.CreateBitCast(Dest, NewDstPtrTy);
290 
291  // Alignment 0 is identity for alignment 1 for memset, but not store.
292  if (Alignment == 0) Alignment = 1;
293 
294  // Extract the fill value and store.
295  uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
296  StoreInst *S = Builder.CreateStore(ConstantInt::get(ITy, Fill), Dest,
297  MI->isVolatile());
298  S->setAlignment(Alignment);
299 
300  // Set the size of the copy to 0, it will be deleted on the next iteration.
301  MI->setLength(Constant::getNullValue(LenC->getType()));
302  return MI;
303  }
304 
305  return nullptr;
306 }
307 
309  InstCombiner::BuilderTy &Builder) {
310  bool LogicalShift = false;
311  bool ShiftLeft = false;
312 
313  switch (II.getIntrinsicID()) {
314  default: llvm_unreachable("Unexpected intrinsic!");
315  case Intrinsic::x86_sse2_psra_d:
316  case Intrinsic::x86_sse2_psra_w:
317  case Intrinsic::x86_sse2_psrai_d:
318  case Intrinsic::x86_sse2_psrai_w:
319  case Intrinsic::x86_avx2_psra_d:
320  case Intrinsic::x86_avx2_psra_w:
321  case Intrinsic::x86_avx2_psrai_d:
322  case Intrinsic::x86_avx2_psrai_w:
323  case Intrinsic::x86_avx512_psra_q_128:
324  case Intrinsic::x86_avx512_psrai_q_128:
325  case Intrinsic::x86_avx512_psra_q_256:
326  case Intrinsic::x86_avx512_psrai_q_256:
327  case Intrinsic::x86_avx512_psra_d_512:
328  case Intrinsic::x86_avx512_psra_q_512:
329  case Intrinsic::x86_avx512_psra_w_512:
330  case Intrinsic::x86_avx512_psrai_d_512:
331  case Intrinsic::x86_avx512_psrai_q_512:
332  case Intrinsic::x86_avx512_psrai_w_512:
333  LogicalShift = false; ShiftLeft = false;
334  break;
335  case Intrinsic::x86_sse2_psrl_d:
336  case Intrinsic::x86_sse2_psrl_q:
337  case Intrinsic::x86_sse2_psrl_w:
338  case Intrinsic::x86_sse2_psrli_d:
339  case Intrinsic::x86_sse2_psrli_q:
340  case Intrinsic::x86_sse2_psrli_w:
341  case Intrinsic::x86_avx2_psrl_d:
342  case Intrinsic::x86_avx2_psrl_q:
343  case Intrinsic::x86_avx2_psrl_w:
344  case Intrinsic::x86_avx2_psrli_d:
345  case Intrinsic::x86_avx2_psrli_q:
346  case Intrinsic::x86_avx2_psrli_w:
347  case Intrinsic::x86_avx512_psrl_d_512:
348  case Intrinsic::x86_avx512_psrl_q_512:
349  case Intrinsic::x86_avx512_psrl_w_512:
350  case Intrinsic::x86_avx512_psrli_d_512:
351  case Intrinsic::x86_avx512_psrli_q_512:
352  case Intrinsic::x86_avx512_psrli_w_512:
353  LogicalShift = true; ShiftLeft = false;
354  break;
355  case Intrinsic::x86_sse2_psll_d:
356  case Intrinsic::x86_sse2_psll_q:
357  case Intrinsic::x86_sse2_psll_w:
358  case Intrinsic::x86_sse2_pslli_d:
359  case Intrinsic::x86_sse2_pslli_q:
360  case Intrinsic::x86_sse2_pslli_w:
361  case Intrinsic::x86_avx2_psll_d:
362  case Intrinsic::x86_avx2_psll_q:
363  case Intrinsic::x86_avx2_psll_w:
364  case Intrinsic::x86_avx2_pslli_d:
365  case Intrinsic::x86_avx2_pslli_q:
366  case Intrinsic::x86_avx2_pslli_w:
367  case Intrinsic::x86_avx512_psll_d_512:
368  case Intrinsic::x86_avx512_psll_q_512:
369  case Intrinsic::x86_avx512_psll_w_512:
370  case Intrinsic::x86_avx512_pslli_d_512:
371  case Intrinsic::x86_avx512_pslli_q_512:
372  case Intrinsic::x86_avx512_pslli_w_512:
373  LogicalShift = true; ShiftLeft = true;
374  break;
375  }
376  assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
377 
378  // Simplify if count is constant.
379  auto Arg1 = II.getArgOperand(1);
380  auto CAZ = dyn_cast<ConstantAggregateZero>(Arg1);
381  auto CDV = dyn_cast<ConstantDataVector>(Arg1);
382  auto CInt = dyn_cast<ConstantInt>(Arg1);
383  if (!CAZ && !CDV && !CInt)
384  return nullptr;
385 
386  APInt Count(64, 0);
387  if (CDV) {
388  // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector
389  // operand to compute the shift amount.
390  auto VT = cast<VectorType>(CDV->getType());
391  unsigned BitWidth = VT->getElementType()->getPrimitiveSizeInBits();
392  assert((64 % BitWidth) == 0 && "Unexpected packed shift size");
393  unsigned NumSubElts = 64 / BitWidth;
394 
395  // Concatenate the sub-elements to create the 64-bit value.
396  for (unsigned i = 0; i != NumSubElts; ++i) {
397  unsigned SubEltIdx = (NumSubElts - 1) - i;
398  auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
399  Count <<= BitWidth;
400  Count |= SubElt->getValue().zextOrTrunc(64);
401  }
402  }
403  else if (CInt)
404  Count = CInt->getValue();
405 
406  auto Vec = II.getArgOperand(0);
407  auto VT = cast<VectorType>(Vec->getType());
408  auto SVT = VT->getElementType();
409  unsigned VWidth = VT->getNumElements();
410  unsigned BitWidth = SVT->getPrimitiveSizeInBits();
411 
412  // If shift-by-zero then just return the original value.
413  if (Count.isNullValue())
414  return Vec;
415 
416  // Handle cases when Shift >= BitWidth.
417  if (Count.uge(BitWidth)) {
418  // If LogicalShift - just return zero.
419  if (LogicalShift)
420  return ConstantAggregateZero::get(VT);
421 
422  // If ArithmeticShift - clamp Shift to (BitWidth - 1).
423  Count = APInt(64, BitWidth - 1);
424  }
425 
426  // Get a constant vector of the same type as the first operand.
427  auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
428  auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
429 
430  if (ShiftLeft)
431  return Builder.CreateShl(Vec, ShiftVec);
432 
433  if (LogicalShift)
434  return Builder.CreateLShr(Vec, ShiftVec);
435 
436  return Builder.CreateAShr(Vec, ShiftVec);
437 }
438 
439 // Attempt to simplify AVX2 per-element shift intrinsics to a generic IR shift.
440 // Unlike the generic IR shifts, the intrinsics have defined behaviour for out
441 // of range shift amounts (logical - set to zero, arithmetic - splat sign bit).
443  InstCombiner::BuilderTy &Builder) {
444  bool LogicalShift = false;
445  bool ShiftLeft = false;
446 
447  switch (II.getIntrinsicID()) {
448  default: llvm_unreachable("Unexpected intrinsic!");
449  case Intrinsic::x86_avx2_psrav_d:
450  case Intrinsic::x86_avx2_psrav_d_256:
451  case Intrinsic::x86_avx512_psrav_q_128:
452  case Intrinsic::x86_avx512_psrav_q_256:
453  case Intrinsic::x86_avx512_psrav_d_512:
454  case Intrinsic::x86_avx512_psrav_q_512:
455  case Intrinsic::x86_avx512_psrav_w_128:
456  case Intrinsic::x86_avx512_psrav_w_256:
457  case Intrinsic::x86_avx512_psrav_w_512:
458  LogicalShift = false;
459  ShiftLeft = false;
460  break;
461  case Intrinsic::x86_avx2_psrlv_d:
462  case Intrinsic::x86_avx2_psrlv_d_256:
463  case Intrinsic::x86_avx2_psrlv_q:
464  case Intrinsic::x86_avx2_psrlv_q_256:
465  case Intrinsic::x86_avx512_psrlv_d_512:
466  case Intrinsic::x86_avx512_psrlv_q_512:
467  case Intrinsic::x86_avx512_psrlv_w_128:
468  case Intrinsic::x86_avx512_psrlv_w_256:
469  case Intrinsic::x86_avx512_psrlv_w_512:
470  LogicalShift = true;
471  ShiftLeft = false;
472  break;
473  case Intrinsic::x86_avx2_psllv_d:
474  case Intrinsic::x86_avx2_psllv_d_256:
475  case Intrinsic::x86_avx2_psllv_q:
476  case Intrinsic::x86_avx2_psllv_q_256:
477  case Intrinsic::x86_avx512_psllv_d_512:
478  case Intrinsic::x86_avx512_psllv_q_512:
479  case Intrinsic::x86_avx512_psllv_w_128:
480  case Intrinsic::x86_avx512_psllv_w_256:
481  case Intrinsic::x86_avx512_psllv_w_512:
482  LogicalShift = true;
483  ShiftLeft = true;
484  break;
485  }
486  assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
487 
488  // Simplify if all shift amounts are constant/undef.
489  auto *CShift = dyn_cast<Constant>(II.getArgOperand(1));
490  if (!CShift)
491  return nullptr;
492 
493  auto Vec = II.getArgOperand(0);
494  auto VT = cast<VectorType>(II.getType());
495  auto SVT = VT->getVectorElementType();
496  int NumElts = VT->getNumElements();
497  int BitWidth = SVT->getIntegerBitWidth();
498 
499  // Collect each element's shift amount.
500  // We also collect special cases: UNDEF = -1, OUT-OF-RANGE = BitWidth.
501  bool AnyOutOfRange = false;
502  SmallVector<int, 8> ShiftAmts;
503  for (int I = 0; I < NumElts; ++I) {
504  auto *CElt = CShift->getAggregateElement(I);
505  if (CElt && isa<UndefValue>(CElt)) {
506  ShiftAmts.push_back(-1);
507  continue;
508  }
509 
510  auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
511  if (!COp)
512  return nullptr;
513 
514  // Handle out of range shifts.
515  // If LogicalShift - set to BitWidth (special case).
516  // If ArithmeticShift - set to (BitWidth - 1) (sign splat).
517  APInt ShiftVal = COp->getValue();
518  if (ShiftVal.uge(BitWidth)) {
519  AnyOutOfRange = LogicalShift;
520  ShiftAmts.push_back(LogicalShift ? BitWidth : BitWidth - 1);
521  continue;
522  }
523 
524  ShiftAmts.push_back((int)ShiftVal.getZExtValue());
525  }
526 
527  // If all elements out of range or UNDEF, return vector of zeros/undefs.
528  // ArithmeticShift should only hit this if they are all UNDEF.
529  auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); };
530  if (llvm::all_of(ShiftAmts, OutOfRange)) {
531  SmallVector<Constant *, 8> ConstantVec;
532  for (int Idx : ShiftAmts) {
533  if (Idx < 0) {
534  ConstantVec.push_back(UndefValue::get(SVT));
535  } else {
536  assert(LogicalShift && "Logical shift expected");
537  ConstantVec.push_back(ConstantInt::getNullValue(SVT));
538  }
539  }
540  return ConstantVector::get(ConstantVec);
541  }
542 
543  // We can't handle only some out of range values with generic logical shifts.
544  if (AnyOutOfRange)
545  return nullptr;
546 
547  // Build the shift amount constant vector.
548  SmallVector<Constant *, 8> ShiftVecAmts;
549  for (int Idx : ShiftAmts) {
550  if (Idx < 0)
551  ShiftVecAmts.push_back(UndefValue::get(SVT));
552  else
553  ShiftVecAmts.push_back(ConstantInt::get(SVT, Idx));
554  }
555  auto ShiftVec = ConstantVector::get(ShiftVecAmts);
556 
557  if (ShiftLeft)
558  return Builder.CreateShl(Vec, ShiftVec);
559 
560  if (LogicalShift)
561  return Builder.CreateLShr(Vec, ShiftVec);
562 
563  return Builder.CreateAShr(Vec, ShiftVec);
564 }
565 
567  InstCombiner::BuilderTy &Builder) {
568  Value *Arg0 = II.getArgOperand(0);
569  Value *Arg1 = II.getArgOperand(1);
570  Type *ResTy = II.getType();
571  assert(Arg0->getType()->getScalarSizeInBits() == 32 &&
572  Arg1->getType()->getScalarSizeInBits() == 32 &&
573  ResTy->getScalarSizeInBits() == 64 && "Unexpected muldq/muludq types");
574 
575  // muldq/muludq(undef, undef) -> zero (matches generic mul behavior)
576  if (isa<UndefValue>(Arg0) || isa<UndefValue>(Arg1))
577  return ConstantAggregateZero::get(ResTy);
578 
579  // Constant folding.
580  // PMULDQ = (mul(vXi64 sext(shuffle<0,2,..>(Arg0)),
581  // vXi64 sext(shuffle<0,2,..>(Arg1))))
582  // PMULUDQ = (mul(vXi64 zext(shuffle<0,2,..>(Arg0)),
583  // vXi64 zext(shuffle<0,2,..>(Arg1))))
584  if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
585  return nullptr;
586 
587  unsigned NumElts = ResTy->getVectorNumElements();
588  assert(Arg0->getType()->getVectorNumElements() == (2 * NumElts) &&
589  Arg1->getType()->getVectorNumElements() == (2 * NumElts) &&
590  "Unexpected muldq/muludq types");
591 
592  unsigned IntrinsicID = II.getIntrinsicID();
593  bool IsSigned = (Intrinsic::x86_sse41_pmuldq == IntrinsicID ||
594  Intrinsic::x86_avx2_pmul_dq == IntrinsicID ||
595  Intrinsic::x86_avx512_pmul_dq_512 == IntrinsicID);
596 
597  SmallVector<unsigned, 16> ShuffleMask;
598  for (unsigned i = 0; i != NumElts; ++i)
599  ShuffleMask.push_back(i * 2);
600 
601  auto *LHS = Builder.CreateShuffleVector(Arg0, Arg0, ShuffleMask);
602  auto *RHS = Builder.CreateShuffleVector(Arg1, Arg1, ShuffleMask);
603 
604  if (IsSigned) {
605  LHS = Builder.CreateSExt(LHS, ResTy);
606  RHS = Builder.CreateSExt(RHS, ResTy);
607  } else {
608  LHS = Builder.CreateZExt(LHS, ResTy);
609  RHS = Builder.CreateZExt(RHS, ResTy);
610  }
611 
612  return Builder.CreateMul(LHS, RHS);
613 }
614 
615 static Value *simplifyX86pack(IntrinsicInst &II, bool IsSigned) {
616  Value *Arg0 = II.getArgOperand(0);
617  Value *Arg1 = II.getArgOperand(1);
618  Type *ResTy = II.getType();
619 
620  // Fast all undef handling.
621  if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
622  return UndefValue::get(ResTy);
623 
624  Type *ArgTy = Arg0->getType();
625  unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128;
626  unsigned NumDstElts = ResTy->getVectorNumElements();
627  unsigned NumSrcElts = ArgTy->getVectorNumElements();
628  assert(NumDstElts == (2 * NumSrcElts) && "Unexpected packing types");
629 
630  unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
631  unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
632  unsigned DstScalarSizeInBits = ResTy->getScalarSizeInBits();
633  assert(ArgTy->getScalarSizeInBits() == (2 * DstScalarSizeInBits) &&
634  "Unexpected packing types");
635 
636  // Constant folding.
637  auto *Cst0 = dyn_cast<Constant>(Arg0);
638  auto *Cst1 = dyn_cast<Constant>(Arg1);
639  if (!Cst0 || !Cst1)
640  return nullptr;
641 
643  for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
644  for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) {
645  unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane;
646  auto *Cst = (Elt >= NumSrcEltsPerLane) ? Cst1 : Cst0;
647  auto *COp = Cst->getAggregateElement(SrcIdx);
648  if (COp && isa<UndefValue>(COp)) {
649  Vals.push_back(UndefValue::get(ResTy->getScalarType()));
650  continue;
651  }
652 
653  auto *CInt = dyn_cast_or_null<ConstantInt>(COp);
654  if (!CInt)
655  return nullptr;
656 
657  APInt Val = CInt->getValue();
658  assert(Val.getBitWidth() == ArgTy->getScalarSizeInBits() &&
659  "Unexpected constant bitwidth");
660 
661  if (IsSigned) {
662  // PACKSS: Truncate signed value with signed saturation.
663  // Source values less than dst minint are saturated to minint.
664  // Source values greater than dst maxint are saturated to maxint.
665  if (Val.isSignedIntN(DstScalarSizeInBits))
666  Val = Val.trunc(DstScalarSizeInBits);
667  else if (Val.isNegative())
668  Val = APInt::getSignedMinValue(DstScalarSizeInBits);
669  else
670  Val = APInt::getSignedMaxValue(DstScalarSizeInBits);
671  } else {
672  // PACKUS: Truncate signed value with unsigned saturation.
673  // Source values less than zero are saturated to zero.
674  // Source values greater than dst maxuint are saturated to maxuint.
675  if (Val.isIntN(DstScalarSizeInBits))
676  Val = Val.trunc(DstScalarSizeInBits);
677  else if (Val.isNegative())
678  Val = APInt::getNullValue(DstScalarSizeInBits);
679  else
680  Val = APInt::getAllOnesValue(DstScalarSizeInBits);
681  }
682 
683  Vals.push_back(ConstantInt::get(ResTy->getScalarType(), Val));
684  }
685  }
686 
687  return ConstantVector::get(Vals);
688 }
689 
691  Value *Arg = II.getArgOperand(0);
692  Type *ResTy = II.getType();
693  Type *ArgTy = Arg->getType();
694 
695  // movmsk(undef) -> zero as we must ensure the upper bits are zero.
696  if (isa<UndefValue>(Arg))
697  return Constant::getNullValue(ResTy);
698 
699  // We can't easily peek through x86_mmx types.
700  if (!ArgTy->isVectorTy())
701  return nullptr;
702 
703  auto *C = dyn_cast<Constant>(Arg);
704  if (!C)
705  return nullptr;
706 
707  // Extract signbits of the vector input and pack into integer result.
708  APInt Result(ResTy->getPrimitiveSizeInBits(), 0);
709  for (unsigned I = 0, E = ArgTy->getVectorNumElements(); I != E; ++I) {
710  auto *COp = C->getAggregateElement(I);
711  if (!COp)
712  return nullptr;
713  if (isa<UndefValue>(COp))
714  continue;
715 
716  auto *CInt = dyn_cast<ConstantInt>(COp);
717  auto *CFp = dyn_cast<ConstantFP>(COp);
718  if (!CInt && !CFp)
719  return nullptr;
720 
721  if ((CInt && CInt->isNegative()) || (CFp && CFp->isNegative()))
722  Result.setBit(I);
723  }
724 
725  return Constant::getIntegerValue(ResTy, Result);
726 }
727 
729  InstCombiner::BuilderTy &Builder) {
730  auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
731  if (!CInt)
732  return nullptr;
733 
734  VectorType *VecTy = cast<VectorType>(II.getType());
735  assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
736 
737  // The immediate permute control byte looks like this:
738  // [3:0] - zero mask for each 32-bit lane
739  // [5:4] - select one 32-bit destination lane
740  // [7:6] - select one 32-bit source lane
741 
742  uint8_t Imm = CInt->getZExtValue();
743  uint8_t ZMask = Imm & 0xf;
744  uint8_t DestLane = (Imm >> 4) & 0x3;
745  uint8_t SourceLane = (Imm >> 6) & 0x3;
746 
748 
749  // If all zero mask bits are set, this was just a weird way to
750  // generate a zero vector.
751  if (ZMask == 0xf)
752  return ZeroVector;
753 
754  // Initialize by passing all of the first source bits through.
755  uint32_t ShuffleMask[4] = { 0, 1, 2, 3 };
756 
757  // We may replace the second operand with the zero vector.
758  Value *V1 = II.getArgOperand(1);
759 
760  if (ZMask) {
761  // If the zero mask is being used with a single input or the zero mask
762  // overrides the destination lane, this is a shuffle with the zero vector.
763  if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
764  (ZMask & (1 << DestLane))) {
765  V1 = ZeroVector;
766  // We may still move 32-bits of the first source vector from one lane
767  // to another.
768  ShuffleMask[DestLane] = SourceLane;
769  // The zero mask may override the previous insert operation.
770  for (unsigned i = 0; i < 4; ++i)
771  if ((ZMask >> i) & 0x1)
772  ShuffleMask[i] = i + 4;
773  } else {
774  // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
775  return nullptr;
776  }
777  } else {
778  // Replace the selected destination lane with the selected source lane.
779  ShuffleMask[DestLane] = SourceLane + 4;
780  }
781 
782  return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
783 }
784 
785 /// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding
786 /// or conversion to a shuffle vector.
788  ConstantInt *CILength, ConstantInt *CIIndex,
789  InstCombiner::BuilderTy &Builder) {
790  auto LowConstantHighUndef = [&](uint64_t Val) {
791  Type *IntTy64 = Type::getInt64Ty(II.getContext());
792  Constant *Args[] = {ConstantInt::get(IntTy64, Val),
793  UndefValue::get(IntTy64)};
794  return ConstantVector::get(Args);
795  };
796 
797  // See if we're dealing with constant values.
798  Constant *C0 = dyn_cast<Constant>(Op0);
799  ConstantInt *CI0 =
800  C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
801  : nullptr;
802 
803  // Attempt to constant fold.
804  if (CILength && CIIndex) {
805  // From AMD documentation: "The bit index and field length are each six
806  // bits in length other bits of the field are ignored."
807  APInt APIndex = CIIndex->getValue().zextOrTrunc(6);
808  APInt APLength = CILength->getValue().zextOrTrunc(6);
809 
810  unsigned Index = APIndex.getZExtValue();
811 
812  // From AMD documentation: "a value of zero in the field length is
813  // defined as length of 64".
814  unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
815 
816  // From AMD documentation: "If the sum of the bit index + length field
817  // is greater than 64, the results are undefined".
818  unsigned End = Index + Length;
819 
820  // Note that both field index and field length are 8-bit quantities.
821  // Since variables 'Index' and 'Length' are unsigned values
822  // obtained from zero-extending field index and field length
823  // respectively, their sum should never wrap around.
824  if (End > 64)
825  return UndefValue::get(II.getType());
826 
827  // If we are inserting whole bytes, we can convert this to a shuffle.
828  // Lowering can recognize EXTRQI shuffle masks.
829  if ((Length % 8) == 0 && (Index % 8) == 0) {
830  // Convert bit indices to byte indices.
831  Length /= 8;
832  Index /= 8;
833 
834  Type *IntTy8 = Type::getInt8Ty(II.getContext());
835  Type *IntTy32 = Type::getInt32Ty(II.getContext());
836  VectorType *ShufTy = VectorType::get(IntTy8, 16);
837 
838  SmallVector<Constant *, 16> ShuffleMask;
839  for (int i = 0; i != (int)Length; ++i)
840  ShuffleMask.push_back(
841  Constant::getIntegerValue(IntTy32, APInt(32, i + Index)));
842  for (int i = Length; i != 8; ++i)
843  ShuffleMask.push_back(
844  Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
845  for (int i = 8; i != 16; ++i)
846  ShuffleMask.push_back(UndefValue::get(IntTy32));
847 
848  Value *SV = Builder.CreateShuffleVector(
849  Builder.CreateBitCast(Op0, ShufTy),
850  ConstantAggregateZero::get(ShufTy), ConstantVector::get(ShuffleMask));
851  return Builder.CreateBitCast(SV, II.getType());
852  }
853 
854  // Constant Fold - shift Index'th bit to lowest position and mask off
855  // Length bits.
856  if (CI0) {
857  APInt Elt = CI0->getValue();
858  Elt.lshrInPlace(Index);
859  Elt = Elt.zextOrTrunc(Length);
860  return LowConstantHighUndef(Elt.getZExtValue());
861  }
862 
863  // If we were an EXTRQ call, we'll save registers if we convert to EXTRQI.
864  if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
865  Value *Args[] = {Op0, CILength, CIIndex};
866  Module *M = II.getModule();
867  Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
868  return Builder.CreateCall(F, Args);
869  }
870  }
871 
872  // Constant Fold - extraction from zero is always {zero, undef}.
873  if (CI0 && CI0->isZero())
874  return LowConstantHighUndef(0);
875 
876  return nullptr;
877 }
878 
879 /// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant
880 /// folding or conversion to a shuffle vector.
882  APInt APLength, APInt APIndex,
883  InstCombiner::BuilderTy &Builder) {
884  // From AMD documentation: "The bit index and field length are each six bits
885  // in length other bits of the field are ignored."
886  APIndex = APIndex.zextOrTrunc(6);
887  APLength = APLength.zextOrTrunc(6);
888 
889  // Attempt to constant fold.
890  unsigned Index = APIndex.getZExtValue();
891 
892  // From AMD documentation: "a value of zero in the field length is
893  // defined as length of 64".
894  unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
895 
896  // From AMD documentation: "If the sum of the bit index + length field
897  // is greater than 64, the results are undefined".
898  unsigned End = Index + Length;
899 
900  // Note that both field index and field length are 8-bit quantities.
901  // Since variables 'Index' and 'Length' are unsigned values
902  // obtained from zero-extending field index and field length
903  // respectively, their sum should never wrap around.
904  if (End > 64)
905  return UndefValue::get(II.getType());
906 
907  // If we are inserting whole bytes, we can convert this to a shuffle.
908  // Lowering can recognize INSERTQI shuffle masks.
909  if ((Length % 8) == 0 && (Index % 8) == 0) {
910  // Convert bit indices to byte indices.
911  Length /= 8;
912  Index /= 8;
913 
914  Type *IntTy8 = Type::getInt8Ty(II.getContext());
915  Type *IntTy32 = Type::getInt32Ty(II.getContext());
916  VectorType *ShufTy = VectorType::get(IntTy8, 16);
917 
918  SmallVector<Constant *, 16> ShuffleMask;
919  for (int i = 0; i != (int)Index; ++i)
920  ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
921  for (int i = 0; i != (int)Length; ++i)
922  ShuffleMask.push_back(
923  Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
924  for (int i = Index + Length; i != 8; ++i)
925  ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
926  for (int i = 8; i != 16; ++i)
927  ShuffleMask.push_back(UndefValue::get(IntTy32));
928 
929  Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),
930  Builder.CreateBitCast(Op1, ShufTy),
931  ConstantVector::get(ShuffleMask));
932  return Builder.CreateBitCast(SV, II.getType());
933  }
934 
935  // See if we're dealing with constant values.
936  Constant *C0 = dyn_cast<Constant>(Op0);
937  Constant *C1 = dyn_cast<Constant>(Op1);
938  ConstantInt *CI00 =
939  C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
940  : nullptr;
941  ConstantInt *CI10 =
942  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
943  : nullptr;
944 
945  // Constant Fold - insert bottom Length bits starting at the Index'th bit.
946  if (CI00 && CI10) {
947  APInt V00 = CI00->getValue();
948  APInt V10 = CI10->getValue();
949  APInt Mask = APInt::getLowBitsSet(64, Length).shl(Index);
950  V00 = V00 & ~Mask;
951  V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index);
952  APInt Val = V00 | V10;
953  Type *IntTy64 = Type::getInt64Ty(II.getContext());
954  Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()),
955  UndefValue::get(IntTy64)};
956  return ConstantVector::get(Args);
957  }
958 
959  // If we were an INSERTQ call, we'll save demanded elements if we convert to
960  // INSERTQI.
961  if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
962  Type *IntTy8 = Type::getInt8Ty(II.getContext());
963  Constant *CILength = ConstantInt::get(IntTy8, Length, false);
964  Constant *CIIndex = ConstantInt::get(IntTy8, Index, false);
965 
966  Value *Args[] = {Op0, Op1, CILength, CIIndex};
967  Module *M = II.getModule();
968  Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
969  return Builder.CreateCall(F, Args);
970  }
971 
972  return nullptr;
973 }
974 
975 /// Attempt to convert pshufb* to shufflevector if the mask is constant.
977  InstCombiner::BuilderTy &Builder) {
979  if (!V)
980  return nullptr;
981 
982  auto *VecTy = cast<VectorType>(II.getType());
983  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
984  unsigned NumElts = VecTy->getNumElements();
985  assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
986  "Unexpected number of elements in shuffle mask!");
987 
988  // Construct a shuffle mask from constant integers or UNDEFs.
989  Constant *Indexes[64] = {nullptr};
990 
991  // Each byte in the shuffle control mask forms an index to permute the
992  // corresponding byte in the destination operand.
993  for (unsigned I = 0; I < NumElts; ++I) {
994  Constant *COp = V->getAggregateElement(I);
995  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
996  return nullptr;
997 
998  if (isa<UndefValue>(COp)) {
999  Indexes[I] = UndefValue::get(MaskEltTy);
1000  continue;
1001  }
1002 
1003  int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
1004 
1005  // If the most significant bit (bit[7]) of each byte of the shuffle
1006  // control mask is set, then zero is written in the result byte.
1007  // The zero vector is in the right-hand side of the resulting
1008  // shufflevector.
1009 
1010  // The value of each index for the high 128-bit lane is the least
1011  // significant 4 bits of the respective shuffle control byte.
1012  Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0);
1013  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1014  }
1015 
1016  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
1017  auto V1 = II.getArgOperand(0);
1018  auto V2 = Constant::getNullValue(VecTy);
1019  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1020 }
1021 
1022 /// Attempt to convert vpermilvar* to shufflevector if the mask is constant.
1024  InstCombiner::BuilderTy &Builder) {
1025  Constant *V = dyn_cast<Constant>(II.getArgOperand(1));
1026  if (!V)
1027  return nullptr;
1028 
1029  auto *VecTy = cast<VectorType>(II.getType());
1030  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
1031  unsigned NumElts = VecTy->getVectorNumElements();
1032  bool IsPD = VecTy->getScalarType()->isDoubleTy();
1033  unsigned NumLaneElts = IsPD ? 2 : 4;
1034  assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
1035 
1036  // Construct a shuffle mask from constant integers or UNDEFs.
1037  Constant *Indexes[16] = {nullptr};
1038 
1039  // The intrinsics only read one or two bits, clear the rest.
1040  for (unsigned I = 0; I < NumElts; ++I) {
1041  Constant *COp = V->getAggregateElement(I);
1042  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1043  return nullptr;
1044 
1045  if (isa<UndefValue>(COp)) {
1046  Indexes[I] = UndefValue::get(MaskEltTy);
1047  continue;
1048  }
1049 
1050  APInt Index = cast<ConstantInt>(COp)->getValue();
1051  Index = Index.zextOrTrunc(32).getLoBits(2);
1052 
1053  // The PD variants uses bit 1 to select per-lane element index, so
1054  // shift down to convert to generic shuffle mask index.
1055  if (IsPD)
1056  Index.lshrInPlace(1);
1057 
1058  // The _256 variants are a bit trickier since the mask bits always index
1059  // into the corresponding 128 half. In order to convert to a generic
1060  // shuffle, we have to make that explicit.
1061  Index += APInt(32, (I / NumLaneElts) * NumLaneElts);
1062 
1063  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1064  }
1065 
1066  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
1067  auto V1 = II.getArgOperand(0);
1068  auto V2 = UndefValue::get(V1->getType());
1069  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1070 }
1071 
1072 /// Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
1074  InstCombiner::BuilderTy &Builder) {
1075  auto *V = dyn_cast<Constant>(II.getArgOperand(1));
1076  if (!V)
1077  return nullptr;
1078 
1079  auto *VecTy = cast<VectorType>(II.getType());
1080  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
1081  unsigned Size = VecTy->getNumElements();
1082  assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) &&
1083  "Unexpected shuffle mask size");
1084 
1085  // Construct a shuffle mask from constant integers or UNDEFs.
1086  Constant *Indexes[64] = {nullptr};
1087 
1088  for (unsigned I = 0; I < Size; ++I) {
1089  Constant *COp = V->getAggregateElement(I);
1090  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1091  return nullptr;
1092 
1093  if (isa<UndefValue>(COp)) {
1094  Indexes[I] = UndefValue::get(MaskEltTy);
1095  continue;
1096  }
1097 
1098  uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
1099  Index &= Size - 1;
1100  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1101  }
1102 
1103  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, Size));
1104  auto V1 = II.getArgOperand(0);
1105  auto V2 = UndefValue::get(VecTy);
1106  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1107 }
1108 
1109 /// Decode XOP integer vector comparison intrinsics.
1111  InstCombiner::BuilderTy &Builder,
1112  bool IsSigned) {
1113  if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
1114  uint64_t Imm = CInt->getZExtValue() & 0x7;
1115  VectorType *VecTy = cast<VectorType>(II.getType());
1117 
1118  switch (Imm) {
1119  case 0x0:
1120  Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1121  break;
1122  case 0x1:
1123  Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1124  break;
1125  case 0x2:
1126  Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1127  break;
1128  case 0x3:
1129  Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1130  break;
1131  case 0x4:
1132  Pred = ICmpInst::ICMP_EQ; break;
1133  case 0x5:
1134  Pred = ICmpInst::ICMP_NE; break;
1135  case 0x6:
1136  return ConstantInt::getSigned(VecTy, 0); // FALSE
1137  case 0x7:
1138  return ConstantInt::getSigned(VecTy, -1); // TRUE
1139  }
1140 
1141  if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0),
1142  II.getArgOperand(1)))
1143  return Builder.CreateSExtOrTrunc(Cmp, VecTy);
1144  }
1145  return nullptr;
1146 }
1147 
1148 // Emit a select instruction and appropriate bitcasts to help simplify
1149 // masked intrinsics.
1151  InstCombiner::BuilderTy &Builder) {
1152  unsigned VWidth = Op0->getType()->getVectorNumElements();
1153 
1154  // If the mask is all ones we don't need the select. But we need to check
1155  // only the bit thats will be used in case VWidth is less than 8.
1156  if (auto *C = dyn_cast<ConstantInt>(Mask))
1157  if (C->getValue().zextOrTrunc(VWidth).isAllOnesValue())
1158  return Op0;
1159 
1160  auto *MaskTy = VectorType::get(Builder.getInt1Ty(),
1161  cast<IntegerType>(Mask->getType())->getBitWidth());
1162  Mask = Builder.CreateBitCast(Mask, MaskTy);
1163 
1164  // If we have less than 8 elements, then the starting mask was an i8 and
1165  // we need to extract down to the right number of elements.
1166  if (VWidth < 8) {
1167  uint32_t Indices[4];
1168  for (unsigned i = 0; i != VWidth; ++i)
1169  Indices[i] = i;
1170  Mask = Builder.CreateShuffleVector(Mask, Mask,
1171  makeArrayRef(Indices, VWidth),
1172  "extract");
1173  }
1174 
1175  return Builder.CreateSelect(Mask, Op0, Op1);
1176 }
1177 
1179  Value *Arg0 = II.getArgOperand(0);
1180  Value *Arg1 = II.getArgOperand(1);
1181 
1182  // fmin(x, x) -> x
1183  if (Arg0 == Arg1)
1184  return Arg0;
1185 
1186  const auto *C1 = dyn_cast<ConstantFP>(Arg1);
1187 
1188  // fmin(x, nan) -> x
1189  if (C1 && C1->isNaN())
1190  return Arg0;
1191 
1192  // This is the value because if undef were NaN, we would return the other
1193  // value and cannot return a NaN unless both operands are.
1194  //
1195  // fmin(undef, x) -> x
1196  if (isa<UndefValue>(Arg0))
1197  return Arg1;
1198 
1199  // fmin(x, undef) -> x
1200  if (isa<UndefValue>(Arg1))
1201  return Arg0;
1202 
1203  Value *X = nullptr;
1204  Value *Y = nullptr;
1205  if (II.getIntrinsicID() == Intrinsic::minnum) {
1206  // fmin(x, fmin(x, y)) -> fmin(x, y)
1207  // fmin(y, fmin(x, y)) -> fmin(x, y)
1208  if (match(Arg1, m_FMin(m_Value(X), m_Value(Y)))) {
1209  if (Arg0 == X || Arg0 == Y)
1210  return Arg1;
1211  }
1212 
1213  // fmin(fmin(x, y), x) -> fmin(x, y)
1214  // fmin(fmin(x, y), y) -> fmin(x, y)
1215  if (match(Arg0, m_FMin(m_Value(X), m_Value(Y)))) {
1216  if (Arg1 == X || Arg1 == Y)
1217  return Arg0;
1218  }
1219 
1220  // TODO: fmin(nnan x, inf) -> x
1221  // TODO: fmin(nnan ninf x, flt_max) -> x
1222  if (C1 && C1->isInfinity()) {
1223  // fmin(x, -inf) -> -inf
1224  if (C1->isNegative())
1225  return Arg1;
1226  }
1227  } else {
1229  // fmax(x, fmax(x, y)) -> fmax(x, y)
1230  // fmax(y, fmax(x, y)) -> fmax(x, y)
1231  if (match(Arg1, m_FMax(m_Value(X), m_Value(Y)))) {
1232  if (Arg0 == X || Arg0 == Y)
1233  return Arg1;
1234  }
1235 
1236  // fmax(fmax(x, y), x) -> fmax(x, y)
1237  // fmax(fmax(x, y), y) -> fmax(x, y)
1238  if (match(Arg0, m_FMax(m_Value(X), m_Value(Y)))) {
1239  if (Arg1 == X || Arg1 == Y)
1240  return Arg0;
1241  }
1242 
1243  // TODO: fmax(nnan x, -inf) -> x
1244  // TODO: fmax(nnan ninf x, -flt_max) -> x
1245  if (C1 && C1->isInfinity()) {
1246  // fmax(x, inf) -> inf
1247  if (!C1->isNegative())
1248  return Arg1;
1249  }
1250  }
1251  return nullptr;
1252 }
1253 
1255  auto *ConstMask = dyn_cast<Constant>(Mask);
1256  if (!ConstMask)
1257  return false;
1258  if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1259  return true;
1260  for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
1261  ++I) {
1262  if (auto *MaskElt = ConstMask->getAggregateElement(I))
1263  if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1264  continue;
1265  return false;
1266  }
1267  return true;
1268 }
1269 
1271  InstCombiner::BuilderTy &Builder) {
1272  // If the mask is all ones or undefs, this is a plain vector load of the 1st
1273  // argument.
1274  if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
1275  Value *LoadPtr = II.getArgOperand(0);
1276  unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
1277  return Builder.CreateAlignedLoad(LoadPtr, Alignment, "unmaskedload");
1278  }
1279 
1280  return nullptr;
1281 }
1282 
1284  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
1285  if (!ConstMask)
1286  return nullptr;
1287 
1288  // If the mask is all zeros, this instruction does nothing.
1289  if (ConstMask->isNullValue())
1290  return IC.eraseInstFromFunction(II);
1291 
1292  // If the mask is all ones, this is a plain vector store of the 1st argument.
1293  if (ConstMask->isAllOnesValue()) {
1294  Value *StorePtr = II.getArgOperand(1);
1295  unsigned Alignment = cast<ConstantInt>(II.getArgOperand(2))->getZExtValue();
1296  return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
1297  }
1298 
1299  return nullptr;
1300 }
1301 
1303  // If the mask is all zeros, return the "passthru" argument of the gather.
1304  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
1305  if (ConstMask && ConstMask->isNullValue())
1306  return IC.replaceInstUsesWith(II, II.getArgOperand(3));
1307 
1308  return nullptr;
1309 }
1310 
1312  // If the mask is all zeros, a scatter does nothing.
1313  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
1314  if (ConstMask && ConstMask->isNullValue())
1315  return IC.eraseInstFromFunction(II);
1316 
1317  return nullptr;
1318 }
1319 
1321  assert((II.getIntrinsicID() == Intrinsic::cttz ||
1322  II.getIntrinsicID() == Intrinsic::ctlz) &&
1323  "Expected cttz or ctlz intrinsic");
1324  Value *Op0 = II.getArgOperand(0);
1325 
1326  KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
1327 
1328  // Create a mask for bits above (ctlz) or below (cttz) the first known one.
1329  bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
1330  unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
1331  : Known.countMaxLeadingZeros();
1332  unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
1333  : Known.countMinLeadingZeros();
1334 
1335  // If all bits above (ctlz) or below (cttz) the first known one are known
1336  // zero, this value is constant.
1337  // FIXME: This should be in InstSimplify because we're replacing an
1338  // instruction with a constant.
1339  if (PossibleZeros == DefiniteZeros) {
1340  auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
1341  return IC.replaceInstUsesWith(II, C);
1342  }
1343 
1344  // If the input to cttz/ctlz is known to be non-zero,
1345  // then change the 'ZeroIsUndef' parameter to 'true'
1346  // because we know the zero behavior can't affect the result.
1347  if (!Known.One.isNullValue() ||
1348  isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II,
1349  &IC.getDominatorTree())) {
1350  if (!match(II.getArgOperand(1), m_One())) {
1351  II.setOperand(1, IC.Builder.getTrue());
1352  return &II;
1353  }
1354  }
1355 
1356  // Add range metadata since known bits can't completely reflect what we know.
1357  // TODO: Handle splat vectors.
1358  auto *IT = dyn_cast<IntegerType>(Op0->getType());
1359  if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
1360  Metadata *LowAndHigh[] = {
1361  ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)),
1362  ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))};
1365  return &II;
1366  }
1367 
1368  return nullptr;
1369 }
1370 
1372  assert(II.getIntrinsicID() == Intrinsic::ctpop &&
1373  "Expected ctpop intrinsic");
1374  Value *Op0 = II.getArgOperand(0);
1375  // FIXME: Try to simplify vectors of integers.
1376  auto *IT = dyn_cast<IntegerType>(Op0->getType());
1377  if (!IT)
1378  return nullptr;
1379 
1380  unsigned BitWidth = IT->getBitWidth();
1381  KnownBits Known(BitWidth);
1382  IC.computeKnownBits(Op0, Known, 0, &II);
1383 
1384  unsigned MinCount = Known.countMinPopulation();
1385  unsigned MaxCount = Known.countMaxPopulation();
1386 
1387  // Add range metadata since known bits can't completely reflect what we know.
1388  if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
1389  Metadata *LowAndHigh[] = {
1391  ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))};
1394  return &II;
1395  }
1396 
1397  return nullptr;
1398 }
1399 
1400 // TODO: If the x86 backend knew how to convert a bool vector mask back to an
1401 // XMM register mask efficiently, we could transform all x86 masked intrinsics
1402 // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
1404  Value *Ptr = II.getOperand(0);
1405  Value *Mask = II.getOperand(1);
1406  Constant *ZeroVec = Constant::getNullValue(II.getType());
1407 
1408  // Special case a zero mask since that's not a ConstantDataVector.
1409  // This masked load instruction creates a zero vector.
1410  if (isa<ConstantAggregateZero>(Mask))
1411  return IC.replaceInstUsesWith(II, ZeroVec);
1412 
1413  auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
1414  if (!ConstMask)
1415  return nullptr;
1416 
1417  // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
1418  // to allow target-independent optimizations.
1419 
1420  // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
1421  // the LLVM intrinsic definition for the pointer argument.
1422  unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
1423  PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace);
1424  Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
1425 
1426  // Second, convert the x86 XMM integer vector mask to a vector of bools based
1427  // on each element's most significant bit (the sign bit).
1428  Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
1429 
1430  // The pass-through vector for an x86 masked load is a zero vector.
1431  CallInst *NewMaskedLoad =
1432  IC.Builder.CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec);
1433  return IC.replaceInstUsesWith(II, NewMaskedLoad);
1434 }
1435 
1436 // TODO: If the x86 backend knew how to convert a bool vector mask back to an
1437 // XMM register mask efficiently, we could transform all x86 masked intrinsics
1438 // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
1440  Value *Ptr = II.getOperand(0);
1441  Value *Mask = II.getOperand(1);
1442  Value *Vec = II.getOperand(2);
1443 
1444  // Special case a zero mask since that's not a ConstantDataVector:
1445  // this masked store instruction does nothing.
1446  if (isa<ConstantAggregateZero>(Mask)) {
1447  IC.eraseInstFromFunction(II);
1448  return true;
1449  }
1450 
1451  // The SSE2 version is too weird (eg, unaligned but non-temporal) to do
1452  // anything else at this level.
1453  if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
1454  return false;
1455 
1456  auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
1457  if (!ConstMask)
1458  return false;
1459 
1460  // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
1461  // to allow target-independent optimizations.
1462 
1463  // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
1464  // the LLVM intrinsic definition for the pointer argument.
1465  unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
1466  PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace);
1467  Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
1468 
1469  // Second, convert the x86 XMM integer vector mask to a vector of bools based
1470  // on each element's most significant bit (the sign bit).
1471  Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
1472 
1473  IC.Builder.CreateMaskedStore(Vec, PtrCast, 1, BoolMask);
1474 
1475  // 'Replace uses' doesn't work for stores. Erase the original masked store.
1476  IC.eraseInstFromFunction(II);
1477  return true;
1478 }
1479 
1480 // Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
1481 //
1482 // A single NaN input is folded to minnum, so we rely on that folding for
1483 // handling NaNs.
1484 static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
1485  const APFloat &Src2) {
1486  APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
1487 
1488  APFloat::cmpResult Cmp0 = Max3.compare(Src0);
1489  assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
1490  if (Cmp0 == APFloat::cmpEqual)
1491  return maxnum(Src1, Src2);
1492 
1493  APFloat::cmpResult Cmp1 = Max3.compare(Src1);
1494  assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
1495  if (Cmp1 == APFloat::cmpEqual)
1496  return maxnum(Src0, Src2);
1497 
1498  return maxnum(Src0, Src1);
1499 }
1500 
1501 // Returns true iff the 2 intrinsics have the same operands, limiting the
1502 // comparison to the first NumOperands.
1503 static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
1504  unsigned NumOperands) {
1505  assert(I.getNumArgOperands() >= NumOperands && "Not enough operands");
1506  assert(E.getNumArgOperands() >= NumOperands && "Not enough operands");
1507  for (unsigned i = 0; i < NumOperands; i++)
1508  if (I.getArgOperand(i) != E.getArgOperand(i))
1509  return false;
1510  return true;
1511 }
1512 
1513 // Remove trivially empty start/end intrinsic ranges, i.e. a start
1514 // immediately followed by an end (ignoring debuginfo or other
1515 // start/end intrinsics in between). As this handles only the most trivial
1516 // cases, tracking the nesting level is not needed:
1517 //
1518 // call @llvm.foo.start(i1 0) ; &I
1519 // call @llvm.foo.start(i1 0)
1520 // call @llvm.foo.end(i1 0) ; This one will not be skipped: it will be removed
1521 // call @llvm.foo.end(i1 0)
1522 static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID,
1523  unsigned EndID, InstCombiner &IC) {
1524  assert(I.getIntrinsicID() == StartID &&
1525  "Start intrinsic does not have expected ID");
1526  BasicBlock::iterator BI(I), BE(I.getParent()->end());
1527  for (++BI; BI != BE; ++BI) {
1528  if (auto *E = dyn_cast<IntrinsicInst>(BI)) {
1529  if (isa<DbgInfoIntrinsic>(E) || E->getIntrinsicID() == StartID)
1530  continue;
1531  if (E->getIntrinsicID() == EndID &&
1532  haveSameOperands(I, *E, E->getNumArgOperands())) {
1533  IC.eraseInstFromFunction(*E);
1534  IC.eraseInstFromFunction(I);
1535  return true;
1536  }
1537  }
1538  break;
1539  }
1540 
1541  return false;
1542 }
1543 
1544 // Convert NVVM intrinsics to target-generic LLVM code where possible.
1546  // Each NVVM intrinsic we can simplify can be replaced with one of:
1547  //
1548  // * an LLVM intrinsic,
1549  // * an LLVM cast operation,
1550  // * an LLVM binary operation, or
1551  // * ad-hoc LLVM IR for the particular operation.
1552 
1553  // Some transformations are only valid when the module's
1554  // flush-denormals-to-zero (ftz) setting is true/false, whereas other
1555  // transformations are valid regardless of the module's ftz setting.
1556  enum FtzRequirementTy {
1557  FTZ_Any, // Any ftz setting is ok.
1558  FTZ_MustBeOn, // Transformation is valid only if ftz is on.
1559  FTZ_MustBeOff, // Transformation is valid only if ftz is off.
1560  };
1561  // Classes of NVVM intrinsics that can't be replaced one-to-one with a
1562  // target-generic intrinsic, cast op, or binary op but that we can nonetheless
1563  // simplify.
1564  enum SpecialCase {
1565  SPC_Reciprocal,
1566  };
1567 
1568  // SimplifyAction is a poor-man's variant (plus an additional flag) that
1569  // represents how to replace an NVVM intrinsic with target-generic LLVM IR.
1570  struct SimplifyAction {
1571  // Invariant: At most one of these Optionals has a value.
1575  Optional<SpecialCase> Special;
1576 
1577  FtzRequirementTy FtzRequirement = FTZ_Any;
1578 
1579  SimplifyAction() = default;
1580 
1581  SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq)
1582  : IID(IID), FtzRequirement(FtzReq) {}
1583 
1584  // Cast operations don't have anything to do with FTZ, so we skip that
1585  // argument.
1586  SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {}
1587 
1588  SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq)
1589  : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
1590 
1591  SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
1592  : Special(Special), FtzRequirement(FtzReq) {}
1593  };
1594 
1595  // Try to generate a SimplifyAction describing how to replace our
1596  // IntrinsicInstr with target-generic LLVM IR.
1597  const SimplifyAction Action = [II]() -> SimplifyAction {
1598  switch (II->getIntrinsicID()) {
1599  // NVVM intrinsics that map directly to LLVM intrinsics.
1600  case Intrinsic::nvvm_ceil_d:
1601  return {Intrinsic::ceil, FTZ_Any};
1602  case Intrinsic::nvvm_ceil_f:
1603  return {Intrinsic::ceil, FTZ_MustBeOff};
1604  case Intrinsic::nvvm_ceil_ftz_f:
1605  return {Intrinsic::ceil, FTZ_MustBeOn};
1606  case Intrinsic::nvvm_fabs_d:
1607  return {Intrinsic::fabs, FTZ_Any};
1608  case Intrinsic::nvvm_fabs_f:
1609  return {Intrinsic::fabs, FTZ_MustBeOff};
1610  case Intrinsic::nvvm_fabs_ftz_f:
1611  return {Intrinsic::fabs, FTZ_MustBeOn};
1612  case Intrinsic::nvvm_floor_d:
1613  return {Intrinsic::floor, FTZ_Any};
1614  case Intrinsic::nvvm_floor_f:
1615  return {Intrinsic::floor, FTZ_MustBeOff};
1616  case Intrinsic::nvvm_floor_ftz_f:
1617  return {Intrinsic::floor, FTZ_MustBeOn};
1618  case Intrinsic::nvvm_fma_rn_d:
1619  return {Intrinsic::fma, FTZ_Any};
1620  case Intrinsic::nvvm_fma_rn_f:
1621  return {Intrinsic::fma, FTZ_MustBeOff};
1622  case Intrinsic::nvvm_fma_rn_ftz_f:
1623  return {Intrinsic::fma, FTZ_MustBeOn};
1624  case Intrinsic::nvvm_fmax_d:
1625  return {Intrinsic::maxnum, FTZ_Any};
1626  case Intrinsic::nvvm_fmax_f:
1627  return {Intrinsic::maxnum, FTZ_MustBeOff};
1628  case Intrinsic::nvvm_fmax_ftz_f:
1629  return {Intrinsic::maxnum, FTZ_MustBeOn};
1630  case Intrinsic::nvvm_fmin_d:
1631  return {Intrinsic::minnum, FTZ_Any};
1632  case Intrinsic::nvvm_fmin_f:
1633  return {Intrinsic::minnum, FTZ_MustBeOff};
1634  case Intrinsic::nvvm_fmin_ftz_f:
1635  return {Intrinsic::minnum, FTZ_MustBeOn};
1636  case Intrinsic::nvvm_round_d:
1637  return {Intrinsic::round, FTZ_Any};
1638  case Intrinsic::nvvm_round_f:
1639  return {Intrinsic::round, FTZ_MustBeOff};
1640  case Intrinsic::nvvm_round_ftz_f:
1641  return {Intrinsic::round, FTZ_MustBeOn};
1642  case Intrinsic::nvvm_sqrt_rn_d:
1643  return {Intrinsic::sqrt, FTZ_Any};
1644  case Intrinsic::nvvm_sqrt_f:
1645  // nvvm_sqrt_f is a special case. For most intrinsics, foo_ftz_f is the
1646  // ftz version, and foo_f is the non-ftz version. But nvvm_sqrt_f adopts
1647  // the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are
1648  // the versions with explicit ftz-ness.
1649  return {Intrinsic::sqrt, FTZ_Any};
1650  case Intrinsic::nvvm_sqrt_rn_f:
1651  return {Intrinsic::sqrt, FTZ_MustBeOff};
1652  case Intrinsic::nvvm_sqrt_rn_ftz_f:
1653  return {Intrinsic::sqrt, FTZ_MustBeOn};
1654  case Intrinsic::nvvm_trunc_d:
1655  return {Intrinsic::trunc, FTZ_Any};
1656  case Intrinsic::nvvm_trunc_f:
1657  return {Intrinsic::trunc, FTZ_MustBeOff};
1658  case Intrinsic::nvvm_trunc_ftz_f:
1659  return {Intrinsic::trunc, FTZ_MustBeOn};
1660 
1661  // NVVM intrinsics that map to LLVM cast operations.
1662  //
1663  // Note that llvm's target-generic conversion operators correspond to the rz
1664  // (round to zero) versions of the nvvm conversion intrinsics, even though
1665  // most everything else here uses the rn (round to nearest even) nvvm ops.
1666  case Intrinsic::nvvm_d2i_rz:
1667  case Intrinsic::nvvm_f2i_rz:
1668  case Intrinsic::nvvm_d2ll_rz:
1669  case Intrinsic::nvvm_f2ll_rz:
1670  return {Instruction::FPToSI};
1671  case Intrinsic::nvvm_d2ui_rz:
1672  case Intrinsic::nvvm_f2ui_rz:
1673  case Intrinsic::nvvm_d2ull_rz:
1674  case Intrinsic::nvvm_f2ull_rz:
1675  return {Instruction::FPToUI};
1676  case Intrinsic::nvvm_i2d_rz:
1677  case Intrinsic::nvvm_i2f_rz:
1678  case Intrinsic::nvvm_ll2d_rz:
1679  case Intrinsic::nvvm_ll2f_rz:
1680  return {Instruction::SIToFP};
1681  case Intrinsic::nvvm_ui2d_rz:
1682  case Intrinsic::nvvm_ui2f_rz:
1683  case Intrinsic::nvvm_ull2d_rz:
1684  case Intrinsic::nvvm_ull2f_rz:
1685  return {Instruction::UIToFP};
1686 
1687  // NVVM intrinsics that map to LLVM binary ops.
1688  case Intrinsic::nvvm_add_rn_d:
1689  return {Instruction::FAdd, FTZ_Any};
1690  case Intrinsic::nvvm_add_rn_f:
1691  return {Instruction::FAdd, FTZ_MustBeOff};
1692  case Intrinsic::nvvm_add_rn_ftz_f:
1693  return {Instruction::FAdd, FTZ_MustBeOn};
1694  case Intrinsic::nvvm_mul_rn_d:
1695  return {Instruction::FMul, FTZ_Any};
1696  case Intrinsic::nvvm_mul_rn_f:
1697  return {Instruction::FMul, FTZ_MustBeOff};
1698  case Intrinsic::nvvm_mul_rn_ftz_f:
1699  return {Instruction::FMul, FTZ_MustBeOn};
1700  case Intrinsic::nvvm_div_rn_d:
1701  return {Instruction::FDiv, FTZ_Any};
1702  case Intrinsic::nvvm_div_rn_f:
1703  return {Instruction::FDiv, FTZ_MustBeOff};
1704  case Intrinsic::nvvm_div_rn_ftz_f:
1705  return {Instruction::FDiv, FTZ_MustBeOn};
1706 
1707  // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but
1708  // need special handling.
1709  //
1710  // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just
1711  // as well.
1712  case Intrinsic::nvvm_rcp_rn_d:
1713  return {SPC_Reciprocal, FTZ_Any};
1714  case Intrinsic::nvvm_rcp_rn_f:
1715  return {SPC_Reciprocal, FTZ_MustBeOff};
1716  case Intrinsic::nvvm_rcp_rn_ftz_f:
1717  return {SPC_Reciprocal, FTZ_MustBeOn};
1718 
1719  // We do not currently simplify intrinsics that give an approximate answer.
1720  // These include:
1721  //
1722  // - nvvm_cos_approx_{f,ftz_f}
1723  // - nvvm_ex2_approx_{d,f,ftz_f}
1724  // - nvvm_lg2_approx_{d,f,ftz_f}
1725  // - nvvm_sin_approx_{f,ftz_f}
1726  // - nvvm_sqrt_approx_{f,ftz_f}
1727  // - nvvm_rsqrt_approx_{d,f,ftz_f}
1728  // - nvvm_div_approx_{ftz_d,ftz_f,f}
1729  // - nvvm_rcp_approx_ftz_d
1730  //
1731  // Ideally we'd encode them as e.g. "fast call @llvm.cos", where "fast"
1732  // means that fastmath is enabled in the intrinsic. Unfortunately only
1733  // binary operators (currently) have a fastmath bit in SelectionDAG, so this
1734  // information gets lost and we can't select on it.
1735  //
1736  // TODO: div and rcp are lowered to a binary op, so these we could in theory
1737  // lower them to "fast fdiv".
1738 
1739  default:
1740  return {};
1741  }
1742  }();
1743 
1744  // If Action.FtzRequirementTy is not satisfied by the module's ftz state, we
1745  // can bail out now. (Notice that in the case that IID is not an NVVM
1746  // intrinsic, we don't have to look up any module metadata, as
1747  // FtzRequirementTy will be FTZ_Any.)
1748  if (Action.FtzRequirement != FTZ_Any) {
1749  bool FtzEnabled =
1750  II->getFunction()->getFnAttribute("nvptx-f32ftz").getValueAsString() ==
1751  "true";
1752 
1753  if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
1754  return nullptr;
1755  }
1756 
1757  // Simplify to target-generic intrinsic.
1758  if (Action.IID) {
1760  // All the target-generic intrinsics currently of interest to us have one
1761  // type argument, equal to that of the nvvm intrinsic's argument.
1762  Type *Tys[] = {II->getArgOperand(0)->getType()};
1763  return CallInst::Create(
1764  Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args);
1765  }
1766 
1767  // Simplify to target-generic binary op.
1768  if (Action.BinaryOp)
1769  return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0),
1770  II->getArgOperand(1), II->getName());
1771 
1772  // Simplify to target-generic cast op.
1773  if (Action.CastOp)
1774  return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(),
1775  II->getName());
1776 
1777  // All that's left are the special cases.
1778  if (!Action.Special)
1779  return nullptr;
1780 
1781  switch (*Action.Special) {
1782  case SPC_Reciprocal:
1783  // Simplify reciprocal.
1784  return BinaryOperator::Create(
1785  Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1),
1786  II->getArgOperand(0), II->getName());
1787  }
1788  llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
1789 }
1790 
1792  removeTriviallyEmptyRange(I, Intrinsic::vastart, Intrinsic::vaend, *this);
1793  return nullptr;
1794 }
1795 
1797  removeTriviallyEmptyRange(I, Intrinsic::vacopy, Intrinsic::vaend, *this);
1798  return nullptr;
1799 }
1800 
1801 /// CallInst simplification. This mostly only handles folding of intrinsic
1802 /// instructions. For normal calls, it allows visitCallSite to do the heavy
1803 /// lifting.
1805  auto Args = CI.arg_operands();
1806  if (Value *V = SimplifyCall(&CI, CI.getCalledValue(), Args.begin(),
1807  Args.end(), SQ.getWithInstruction(&CI)))
1808  return replaceInstUsesWith(CI, V);
1809 
1810  if (isFreeCall(&CI, &TLI))
1811  return visitFree(CI);
1812 
1813  // If the caller function is nounwind, mark the call as nounwind, even if the
1814  // callee isn't.
1815  if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1816  CI.setDoesNotThrow();
1817  return &CI;
1818  }
1819 
1820  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
1821  if (!II) return visitCallSite(&CI);
1822 
1823  // Intrinsics cannot occur in an invoke, so handle them here instead of in
1824  // visitCallSite.
1825  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
1826  bool Changed = false;
1827 
1828  // memmove/cpy/set of zero bytes is a noop.
1829  if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
1830  if (NumBytes->isNullValue())
1831  return eraseInstFromFunction(CI);
1832 
1833  if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
1834  if (CI->getZExtValue() == 1) {
1835  // Replace the instruction with just byte operations. We would
1836  // transform other cases to loads/stores, but we don't know if
1837  // alignment is sufficient.
1838  }
1839  }
1840 
1841  // No other transformations apply to volatile transfers.
1842  if (MI->isVolatile())
1843  return nullptr;
1844 
1845  // If we have a memmove and the source operation is a constant global,
1846  // then the source and dest pointers can't alias, so we can change this
1847  // into a call to memcpy.
1848  if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
1849  if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1850  if (GVSrc->isConstant()) {
1851  Module *M = CI.getModule();
1852  Intrinsic::ID MemCpyID = Intrinsic::memcpy;
1853  Type *Tys[3] = { CI.getArgOperand(0)->getType(),
1854  CI.getArgOperand(1)->getType(),
1855  CI.getArgOperand(2)->getType() };
1856  CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
1857  Changed = true;
1858  }
1859  }
1860 
1861  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
1862  // memmove(x,x,size) -> noop.
1863  if (MTI->getSource() == MTI->getDest())
1864  return eraseInstFromFunction(CI);
1865  }
1866 
1867  // If we can determine a pointer alignment that is bigger than currently
1868  // set, update the alignment.
1869  if (isa<MemTransferInst>(MI)) {
1870  if (Instruction *I = SimplifyMemTransfer(MI))
1871  return I;
1872  } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
1873  if (Instruction *I = SimplifyMemSet(MSI))
1874  return I;
1875  }
1876 
1877  if (Changed) return II;
1878  }
1879 
1880  if (auto *AMI = dyn_cast<AtomicMemCpyInst>(II)) {
1881  if (Constant *C = dyn_cast<Constant>(AMI->getLength()))
1882  if (C->isNullValue())
1883  return eraseInstFromFunction(*AMI);
1884 
1885  if (Instruction *I = SimplifyElementUnorderedAtomicMemCpy(AMI))
1886  return I;
1887  }
1888 
1889  if (Instruction *I = SimplifyNVVMIntrinsic(II, *this))
1890  return I;
1891 
1892  auto SimplifyDemandedVectorEltsLow = [this](Value *Op, unsigned Width,
1893  unsigned DemandedWidth) {
1894  APInt UndefElts(Width, 0);
1895  APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth);
1896  return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
1897  };
1898 
1899  switch (II->getIntrinsicID()) {
1900  default: break;
1901  case Intrinsic::objectsize:
1902  if (ConstantInt *N =
1903  lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
1904  return replaceInstUsesWith(CI, N);
1905  return nullptr;
1906 
1907  case Intrinsic::bswap: {
1908  Value *IIOperand = II->getArgOperand(0);
1909  Value *X = nullptr;
1910 
1911  // TODO should this be in InstSimplify?
1912  // bswap(bswap(x)) -> x
1913  if (match(IIOperand, m_BSwap(m_Value(X))))
1914  return replaceInstUsesWith(CI, X);
1915 
1916  // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
1917  if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
1918  unsigned C = X->getType()->getPrimitiveSizeInBits() -
1919  IIOperand->getType()->getPrimitiveSizeInBits();
1920  Value *CV = ConstantInt::get(X->getType(), C);
1921  Value *V = Builder.CreateLShr(X, CV);
1922  return new TruncInst(V, IIOperand->getType());
1923  }
1924  break;
1925  }
1926 
1927  case Intrinsic::bitreverse: {
1928  Value *IIOperand = II->getArgOperand(0);
1929  Value *X = nullptr;
1930 
1931  // TODO should this be in InstSimplify?
1932  // bitreverse(bitreverse(x)) -> x
1933  if (match(IIOperand, m_BitReverse(m_Value(X))))
1934  return replaceInstUsesWith(CI, X);
1935  break;
1936  }
1937 
1938  case Intrinsic::masked_load:
1939  if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder))
1940  return replaceInstUsesWith(CI, SimplifiedMaskedOp);
1941  break;
1942  case Intrinsic::masked_store:
1943  return simplifyMaskedStore(*II, *this);
1944  case Intrinsic::masked_gather:
1945  return simplifyMaskedGather(*II, *this);
1946  case Intrinsic::masked_scatter:
1947  return simplifyMaskedScatter(*II, *this);
1948 
1949  case Intrinsic::powi:
1950  if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
1951  // powi(x, 0) -> 1.0
1952  if (Power->isZero())
1953  return replaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
1954  // powi(x, 1) -> x
1955  if (Power->isOne())
1956  return replaceInstUsesWith(CI, II->getArgOperand(0));
1957  // powi(x, -1) -> 1/x
1958  if (Power->isMinusOne())
1959  return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
1960  II->getArgOperand(0));
1961  }
1962  break;
1963 
1964  case Intrinsic::cttz:
1965  case Intrinsic::ctlz:
1966  if (auto *I = foldCttzCtlz(*II, *this))
1967  return I;
1968  break;
1969 
1970  case Intrinsic::ctpop:
1971  if (auto *I = foldCtpop(*II, *this))
1972  return I;
1973  break;
1974 
1975  case Intrinsic::uadd_with_overflow:
1976  case Intrinsic::sadd_with_overflow:
1977  case Intrinsic::umul_with_overflow:
1978  case Intrinsic::smul_with_overflow:
1979  if (isa<Constant>(II->getArgOperand(0)) &&
1980  !isa<Constant>(II->getArgOperand(1))) {
1981  // Canonicalize constants into the RHS.
1982  Value *LHS = II->getArgOperand(0);
1983  II->setArgOperand(0, II->getArgOperand(1));
1984  II->setArgOperand(1, LHS);
1985  return II;
1986  }
1988 
1989  case Intrinsic::usub_with_overflow:
1990  case Intrinsic::ssub_with_overflow: {
1991  OverflowCheckFlavor OCF =
1993  assert(OCF != OCF_INVALID && "unexpected!");
1994 
1995  Value *OperationResult = nullptr;
1996  Constant *OverflowResult = nullptr;
1997  if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
1998  *II, OperationResult, OverflowResult))
1999  return CreateOverflowTuple(II, OperationResult, OverflowResult);
2000 
2001  break;
2002  }
2003 
2004  case Intrinsic::minnum:
2005  case Intrinsic::maxnum: {
2006  Value *Arg0 = II->getArgOperand(0);
2007  Value *Arg1 = II->getArgOperand(1);
2008  // Canonicalize constants to the RHS.
2009  if (isa<ConstantFP>(Arg0) && !isa<ConstantFP>(Arg1)) {
2010  II->setArgOperand(0, Arg1);
2011  II->setArgOperand(1, Arg0);
2012  return II;
2013  }
2014  if (Value *V = simplifyMinnumMaxnum(*II))
2015  return replaceInstUsesWith(*II, V);
2016  break;
2017  }
2018  case Intrinsic::fmuladd: {
2019  // Canonicalize fast fmuladd to the separate fmul + fadd.
2020  if (II->isFast()) {
2021  BuilderTy::FastMathFlagGuard Guard(Builder);
2022  Builder.setFastMathFlags(II->getFastMathFlags());
2023  Value *Mul = Builder.CreateFMul(II->getArgOperand(0),
2024  II->getArgOperand(1));
2025  Value *Add = Builder.CreateFAdd(Mul, II->getArgOperand(2));
2026  Add->takeName(II);
2027  return replaceInstUsesWith(*II, Add);
2028  }
2029 
2031  }
2032  case Intrinsic::fma: {
2033  Value *Src0 = II->getArgOperand(0);
2034  Value *Src1 = II->getArgOperand(1);
2035 
2036  // Canonicalize constants into the RHS.
2037  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
2038  II->setArgOperand(0, Src1);
2039  II->setArgOperand(1, Src0);
2040  std::swap(Src0, Src1);
2041  }
2042 
2043  Value *LHS = nullptr;
2044  Value *RHS = nullptr;
2045 
2046  // fma fneg(x), fneg(y), z -> fma x, y, z
2047  if (match(Src0, m_FNeg(m_Value(LHS))) &&
2048  match(Src1, m_FNeg(m_Value(RHS)))) {
2049  II->setArgOperand(0, LHS);
2050  II->setArgOperand(1, RHS);
2051  return II;
2052  }
2053 
2054  // fma fabs(x), fabs(x), z -> fma x, x, z
2055  if (match(Src0, m_Intrinsic<Intrinsic::fabs>(m_Value(LHS))) &&
2056  match(Src1, m_Intrinsic<Intrinsic::fabs>(m_Value(RHS))) && LHS == RHS) {
2057  II->setArgOperand(0, LHS);
2058  II->setArgOperand(1, RHS);
2059  return II;
2060  }
2061 
2062  // fma x, 1, z -> fadd x, z
2063  if (match(Src1, m_FPOne())) {
2064  Instruction *RI = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2));
2065  RI->copyFastMathFlags(II);
2066  return RI;
2067  }
2068 
2069  break;
2070  }
2071  case Intrinsic::fabs: {
2072  Value *Cond;
2073  Constant *LHS, *RHS;
2074  if (match(II->getArgOperand(0),
2075  m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) {
2076  CallInst *Call0 = Builder.CreateCall(II->getCalledFunction(), {LHS});
2077  CallInst *Call1 = Builder.CreateCall(II->getCalledFunction(), {RHS});
2078  return SelectInst::Create(Cond, Call0, Call1);
2079  }
2080 
2082  }
2083  case Intrinsic::ceil:
2084  case Intrinsic::floor:
2085  case Intrinsic::round:
2086  case Intrinsic::nearbyint:
2087  case Intrinsic::rint:
2088  case Intrinsic::trunc: {
2089  Value *ExtSrc;
2090  if (match(II->getArgOperand(0), m_FPExt(m_Value(ExtSrc))) &&
2091  II->getArgOperand(0)->hasOneUse()) {
2092  // fabs (fpext x) -> fpext (fabs x)
2094  { ExtSrc->getType() });
2095  CallInst *NewFabs = Builder.CreateCall(F, ExtSrc);
2096  NewFabs->copyFastMathFlags(II);
2097  NewFabs->takeName(II);
2098  return new FPExtInst(NewFabs, II->getType());
2099  }
2100 
2101  break;
2102  }
2103  case Intrinsic::cos:
2104  case Intrinsic::amdgcn_cos: {
2105  Value *SrcSrc;
2106  Value *Src = II->getArgOperand(0);
2107  if (match(Src, m_FNeg(m_Value(SrcSrc))) ||
2108  match(Src, m_Intrinsic<Intrinsic::fabs>(m_Value(SrcSrc)))) {
2109  // cos(-x) -> cos(x)
2110  // cos(fabs(x)) -> cos(x)
2111  II->setArgOperand(0, SrcSrc);
2112  return II;
2113  }
2114 
2115  break;
2116  }
2117  case Intrinsic::ppc_altivec_lvx:
2118  case Intrinsic::ppc_altivec_lvxl:
2119  // Turn PPC lvx -> load if the pointer is known aligned.
2120  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
2121  &DT) >= 16) {
2122  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2123  PointerType::getUnqual(II->getType()));
2124  return new LoadInst(Ptr);
2125  }
2126  break;
2127  case Intrinsic::ppc_vsx_lxvw4x:
2128  case Intrinsic::ppc_vsx_lxvd2x: {
2129  // Turn PPC VSX loads into normal loads.
2130  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2131  PointerType::getUnqual(II->getType()));
2132  return new LoadInst(Ptr, Twine(""), false, 1);
2133  }
2134  case Intrinsic::ppc_altivec_stvx:
2135  case Intrinsic::ppc_altivec_stvxl:
2136  // Turn stvx -> store if the pointer is known aligned.
2137  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
2138  &DT) >= 16) {
2139  Type *OpPtrTy =
2140  PointerType::getUnqual(II->getArgOperand(0)->getType());
2141  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2142  return new StoreInst(II->getArgOperand(0), Ptr);
2143  }
2144  break;
2145  case Intrinsic::ppc_vsx_stxvw4x:
2146  case Intrinsic::ppc_vsx_stxvd2x: {
2147  // Turn PPC VSX stores into normal stores.
2148  Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType());
2149  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2150  return new StoreInst(II->getArgOperand(0), Ptr, false, 1);
2151  }
2152  case Intrinsic::ppc_qpx_qvlfs:
2153  // Turn PPC QPX qvlfs -> load if the pointer is known aligned.
2154  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
2155  &DT) >= 16) {
2156  Type *VTy = VectorType::get(Builder.getFloatTy(),
2157  II->getType()->getVectorNumElements());
2158  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2159  PointerType::getUnqual(VTy));
2160  Value *Load = Builder.CreateLoad(Ptr);
2161  return new FPExtInst(Load, II->getType());
2162  }
2163  break;
2164  case Intrinsic::ppc_qpx_qvlfd:
2165  // Turn PPC QPX qvlfd -> load if the pointer is known aligned.
2166  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, &AC,
2167  &DT) >= 32) {
2168  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2169  PointerType::getUnqual(II->getType()));
2170  return new LoadInst(Ptr);
2171  }
2172  break;
2173  case Intrinsic::ppc_qpx_qvstfs:
2174  // Turn PPC QPX qvstfs -> store if the pointer is known aligned.
2175  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
2176  &DT) >= 16) {
2177  Type *VTy = VectorType::get(Builder.getFloatTy(),
2178  II->getArgOperand(0)->getType()->getVectorNumElements());
2179  Value *TOp = Builder.CreateFPTrunc(II->getArgOperand(0), VTy);
2180  Type *OpPtrTy = PointerType::getUnqual(VTy);
2181  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2182  return new StoreInst(TOp, Ptr);
2183  }
2184  break;
2185  case Intrinsic::ppc_qpx_qvstfd:
2186  // Turn PPC QPX qvstfd -> store if the pointer is known aligned.
2187  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, &AC,
2188  &DT) >= 32) {
2189  Type *OpPtrTy =
2190  PointerType::getUnqual(II->getArgOperand(0)->getType());
2191  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2192  return new StoreInst(II->getArgOperand(0), Ptr);
2193  }
2194  break;
2195 
2196  case Intrinsic::x86_bmi_bextr_32:
2197  case Intrinsic::x86_bmi_bextr_64:
2198  case Intrinsic::x86_tbm_bextri_u32:
2199  case Intrinsic::x86_tbm_bextri_u64:
2200  // If the RHS is a constant we can try some simplifications.
2201  if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2202  uint64_t Shift = C->getZExtValue();
2203  uint64_t Length = (Shift >> 8) & 0xff;
2204  Shift &= 0xff;
2205  unsigned BitWidth = II->getType()->getIntegerBitWidth();
2206  // If the length is 0 or the shift is out of range, replace with zero.
2207  if (Length == 0 || Shift >= BitWidth)
2208  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
2209  // If the LHS is also a constant, we can completely constant fold this.
2210  if (auto *InC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
2211  uint64_t Result = InC->getZExtValue() >> Shift;
2212  if (Length > BitWidth)
2213  Length = BitWidth;
2214  Result &= maskTrailingOnes<uint64_t>(Length);
2215  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
2216  }
2217  // TODO should we turn this into 'and' if shift is 0? Or 'shl' if we
2218  // are only masking bits that a shift already cleared?
2219  }
2220  break;
2221 
2222  case Intrinsic::x86_bmi_bzhi_32:
2223  case Intrinsic::x86_bmi_bzhi_64:
2224  // If the RHS is a constant we can try some simplifications.
2225  if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2226  uint64_t Index = C->getZExtValue() & 0xff;
2227  unsigned BitWidth = II->getType()->getIntegerBitWidth();
2228  if (Index >= BitWidth)
2229  return replaceInstUsesWith(CI, II->getArgOperand(0));
2230  if (Index == 0)
2231  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
2232  // If the LHS is also a constant, we can completely constant fold this.
2233  if (auto *InC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
2234  uint64_t Result = InC->getZExtValue();
2235  Result &= maskTrailingOnes<uint64_t>(Index);
2236  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
2237  }
2238  // TODO should we convert this to an AND if the RHS is constant?
2239  }
2240  break;
2241 
2242  case Intrinsic::x86_vcvtph2ps_128:
2243  case Intrinsic::x86_vcvtph2ps_256: {
2244  auto Arg = II->getArgOperand(0);
2245  auto ArgType = cast<VectorType>(Arg->getType());
2246  auto RetType = cast<VectorType>(II->getType());
2247  unsigned ArgWidth = ArgType->getNumElements();
2248  unsigned RetWidth = RetType->getNumElements();
2249  assert(RetWidth <= ArgWidth && "Unexpected input/return vector widths");
2250  assert(ArgType->isIntOrIntVectorTy() &&
2251  ArgType->getScalarSizeInBits() == 16 &&
2252  "CVTPH2PS input type should be 16-bit integer vector");
2253  assert(RetType->getScalarType()->isFloatTy() &&
2254  "CVTPH2PS output type should be 32-bit float vector");
2255 
2256  // Constant folding: Convert to generic half to single conversion.
2257  if (isa<ConstantAggregateZero>(Arg))
2258  return replaceInstUsesWith(*II, ConstantAggregateZero::get(RetType));
2259 
2260  if (isa<ConstantDataVector>(Arg)) {
2261  auto VectorHalfAsShorts = Arg;
2262  if (RetWidth < ArgWidth) {
2263  SmallVector<uint32_t, 8> SubVecMask;
2264  for (unsigned i = 0; i != RetWidth; ++i)
2265  SubVecMask.push_back((int)i);
2266  VectorHalfAsShorts = Builder.CreateShuffleVector(
2267  Arg, UndefValue::get(ArgType), SubVecMask);
2268  }
2269 
2270  auto VectorHalfType =
2271  VectorType::get(Type::getHalfTy(II->getContext()), RetWidth);
2272  auto VectorHalfs =
2273  Builder.CreateBitCast(VectorHalfAsShorts, VectorHalfType);
2274  auto VectorFloats = Builder.CreateFPExt(VectorHalfs, RetType);
2275  return replaceInstUsesWith(*II, VectorFloats);
2276  }
2277 
2278  // We only use the lowest lanes of the argument.
2279  if (Value *V = SimplifyDemandedVectorEltsLow(Arg, ArgWidth, RetWidth)) {
2280  II->setArgOperand(0, V);
2281  return II;
2282  }
2283  break;
2284  }
2285 
2286  case Intrinsic::x86_sse_cvtss2si:
2287  case Intrinsic::x86_sse_cvtss2si64:
2288  case Intrinsic::x86_sse_cvttss2si:
2289  case Intrinsic::x86_sse_cvttss2si64:
2290  case Intrinsic::x86_sse2_cvtsd2si:
2291  case Intrinsic::x86_sse2_cvtsd2si64:
2292  case Intrinsic::x86_sse2_cvttsd2si:
2293  case Intrinsic::x86_sse2_cvttsd2si64:
2294  case Intrinsic::x86_avx512_vcvtss2si32:
2295  case Intrinsic::x86_avx512_vcvtss2si64:
2296  case Intrinsic::x86_avx512_vcvtss2usi32:
2297  case Intrinsic::x86_avx512_vcvtss2usi64:
2298  case Intrinsic::x86_avx512_vcvtsd2si32:
2299  case Intrinsic::x86_avx512_vcvtsd2si64:
2300  case Intrinsic::x86_avx512_vcvtsd2usi32:
2301  case Intrinsic::x86_avx512_vcvtsd2usi64:
2302  case Intrinsic::x86_avx512_cvttss2si:
2303  case Intrinsic::x86_avx512_cvttss2si64:
2304  case Intrinsic::x86_avx512_cvttss2usi:
2305  case Intrinsic::x86_avx512_cvttss2usi64:
2306  case Intrinsic::x86_avx512_cvttsd2si:
2307  case Intrinsic::x86_avx512_cvttsd2si64:
2308  case Intrinsic::x86_avx512_cvttsd2usi:
2309  case Intrinsic::x86_avx512_cvttsd2usi64: {
2310  // These intrinsics only demand the 0th element of their input vectors. If
2311  // we can simplify the input based on that, do so now.
2312  Value *Arg = II->getArgOperand(0);
2313  unsigned VWidth = Arg->getType()->getVectorNumElements();
2314  if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2315  II->setArgOperand(0, V);
2316  return II;
2317  }
2318  break;
2319  }
2320 
2321  case Intrinsic::x86_mmx_pmovmskb:
2322  case Intrinsic::x86_sse_movmsk_ps:
2323  case Intrinsic::x86_sse2_movmsk_pd:
2324  case Intrinsic::x86_sse2_pmovmskb_128:
2325  case Intrinsic::x86_avx_movmsk_pd_256:
2326  case Intrinsic::x86_avx_movmsk_ps_256:
2327  case Intrinsic::x86_avx2_pmovmskb:
2328  if (Value *V = simplifyX86movmsk(*II))
2329  return replaceInstUsesWith(*II, V);
2330  break;
2331 
2332  case Intrinsic::x86_sse_comieq_ss:
2333  case Intrinsic::x86_sse_comige_ss:
2334  case Intrinsic::x86_sse_comigt_ss:
2335  case Intrinsic::x86_sse_comile_ss:
2336  case Intrinsic::x86_sse_comilt_ss:
2337  case Intrinsic::x86_sse_comineq_ss:
2338  case Intrinsic::x86_sse_ucomieq_ss:
2339  case Intrinsic::x86_sse_ucomige_ss:
2340  case Intrinsic::x86_sse_ucomigt_ss:
2341  case Intrinsic::x86_sse_ucomile_ss:
2342  case Intrinsic::x86_sse_ucomilt_ss:
2343  case Intrinsic::x86_sse_ucomineq_ss:
2344  case Intrinsic::x86_sse2_comieq_sd:
2345  case Intrinsic::x86_sse2_comige_sd:
2346  case Intrinsic::x86_sse2_comigt_sd:
2347  case Intrinsic::x86_sse2_comile_sd:
2348  case Intrinsic::x86_sse2_comilt_sd:
2349  case Intrinsic::x86_sse2_comineq_sd:
2350  case Intrinsic::x86_sse2_ucomieq_sd:
2351  case Intrinsic::x86_sse2_ucomige_sd:
2352  case Intrinsic::x86_sse2_ucomigt_sd:
2353  case Intrinsic::x86_sse2_ucomile_sd:
2354  case Intrinsic::x86_sse2_ucomilt_sd:
2355  case Intrinsic::x86_sse2_ucomineq_sd:
2356  case Intrinsic::x86_avx512_vcomi_ss:
2357  case Intrinsic::x86_avx512_vcomi_sd:
2358  case Intrinsic::x86_avx512_mask_cmp_ss:
2359  case Intrinsic::x86_avx512_mask_cmp_sd: {
2360  // These intrinsics only demand the 0th element of their input vectors. If
2361  // we can simplify the input based on that, do so now.
2362  bool MadeChange = false;
2363  Value *Arg0 = II->getArgOperand(0);
2364  Value *Arg1 = II->getArgOperand(1);
2365  unsigned VWidth = Arg0->getType()->getVectorNumElements();
2366  if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2367  II->setArgOperand(0, V);
2368  MadeChange = true;
2369  }
2370  if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2371  II->setArgOperand(1, V);
2372  MadeChange = true;
2373  }
2374  if (MadeChange)
2375  return II;
2376  break;
2377  }
2378  case Intrinsic::x86_avx512_mask_cmp_pd_128:
2379  case Intrinsic::x86_avx512_mask_cmp_pd_256:
2380  case Intrinsic::x86_avx512_mask_cmp_pd_512:
2381  case Intrinsic::x86_avx512_mask_cmp_ps_128:
2382  case Intrinsic::x86_avx512_mask_cmp_ps_256:
2383  case Intrinsic::x86_avx512_mask_cmp_ps_512: {
2384  // Folding cmp(sub(a,b),0) -> cmp(a,b) and cmp(0,sub(a,b)) -> cmp(b,a)
2385  Value *Arg0 = II->getArgOperand(0);
2386  Value *Arg1 = II->getArgOperand(1);
2387  bool Arg0IsZero = match(Arg0, m_Zero());
2388  if (Arg0IsZero)
2389  std::swap(Arg0, Arg1);
2390  Value *A, *B;
2391  // This fold requires only the NINF(not +/- inf) since inf minus
2392  // inf is nan.
2393  // NSZ(No Signed Zeros) is not needed because zeros of any sign are
2394  // equal for both compares.
2395  // NNAN is not needed because nans compare the same for both compares.
2396  // The compare intrinsic uses the above assumptions and therefore
2397  // doesn't require additional flags.
2398  if ((match(Arg0, m_OneUse(m_FSub(m_Value(A), m_Value(B)))) &&
2399  match(Arg1, m_Zero()) &&
2400  cast<Instruction>(Arg0)->getFastMathFlags().noInfs())) {
2401  if (Arg0IsZero)
2402  std::swap(A, B);
2403  II->setArgOperand(0, A);
2404  II->setArgOperand(1, B);
2405  return II;
2406  }
2407  break;
2408  }
2409 
2410  case Intrinsic::x86_avx512_mask_add_ps_512:
2411  case Intrinsic::x86_avx512_mask_div_ps_512:
2412  case Intrinsic::x86_avx512_mask_mul_ps_512:
2413  case Intrinsic::x86_avx512_mask_sub_ps_512:
2414  case Intrinsic::x86_avx512_mask_add_pd_512:
2415  case Intrinsic::x86_avx512_mask_div_pd_512:
2416  case Intrinsic::x86_avx512_mask_mul_pd_512:
2417  case Intrinsic::x86_avx512_mask_sub_pd_512:
2418  // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
2419  // IR operations.
2420  if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
2421  if (R->getValue() == 4) {
2422  Value *Arg0 = II->getArgOperand(0);
2423  Value *Arg1 = II->getArgOperand(1);
2424 
2425  Value *V;
2426  switch (II->getIntrinsicID()) {
2427  default: llvm_unreachable("Case stmts out of sync!");
2428  case Intrinsic::x86_avx512_mask_add_ps_512:
2429  case Intrinsic::x86_avx512_mask_add_pd_512:
2430  V = Builder.CreateFAdd(Arg0, Arg1);
2431  break;
2432  case Intrinsic::x86_avx512_mask_sub_ps_512:
2433  case Intrinsic::x86_avx512_mask_sub_pd_512:
2434  V = Builder.CreateFSub(Arg0, Arg1);
2435  break;
2436  case Intrinsic::x86_avx512_mask_mul_ps_512:
2437  case Intrinsic::x86_avx512_mask_mul_pd_512:
2438  V = Builder.CreateFMul(Arg0, Arg1);
2439  break;
2440  case Intrinsic::x86_avx512_mask_div_ps_512:
2441  case Intrinsic::x86_avx512_mask_div_pd_512:
2442  V = Builder.CreateFDiv(Arg0, Arg1);
2443  break;
2444  }
2445 
2446  // Create a select for the masking.
2447  V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2),
2448  Builder);
2449  return replaceInstUsesWith(*II, V);
2450  }
2451  }
2452  break;
2453 
2454  case Intrinsic::x86_avx512_mask_add_ss_round:
2455  case Intrinsic::x86_avx512_mask_div_ss_round:
2456  case Intrinsic::x86_avx512_mask_mul_ss_round:
2457  case Intrinsic::x86_avx512_mask_sub_ss_round:
2458  case Intrinsic::x86_avx512_mask_add_sd_round:
2459  case Intrinsic::x86_avx512_mask_div_sd_round:
2460  case Intrinsic::x86_avx512_mask_mul_sd_round:
2461  case Intrinsic::x86_avx512_mask_sub_sd_round:
2462  // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
2463  // IR operations.
2464  if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
2465  if (R->getValue() == 4) {
2466  // Extract the element as scalars.
2467  Value *Arg0 = II->getArgOperand(0);
2468  Value *Arg1 = II->getArgOperand(1);
2469  Value *LHS = Builder.CreateExtractElement(Arg0, (uint64_t)0);
2470  Value *RHS = Builder.CreateExtractElement(Arg1, (uint64_t)0);
2471 
2472  Value *V;
2473  switch (II->getIntrinsicID()) {
2474  default: llvm_unreachable("Case stmts out of sync!");
2475  case Intrinsic::x86_avx512_mask_add_ss_round:
2476  case Intrinsic::x86_avx512_mask_add_sd_round:
2477  V = Builder.CreateFAdd(LHS, RHS);
2478  break;
2479  case Intrinsic::x86_avx512_mask_sub_ss_round:
2480  case Intrinsic::x86_avx512_mask_sub_sd_round:
2481  V = Builder.CreateFSub(LHS, RHS);
2482  break;
2483  case Intrinsic::x86_avx512_mask_mul_ss_round:
2484  case Intrinsic::x86_avx512_mask_mul_sd_round:
2485  V = Builder.CreateFMul(LHS, RHS);
2486  break;
2487  case Intrinsic::x86_avx512_mask_div_ss_round:
2488  case Intrinsic::x86_avx512_mask_div_sd_round:
2489  V = Builder.CreateFDiv(LHS, RHS);
2490  break;
2491  }
2492 
2493  // Handle the masking aspect of the intrinsic.
2494  Value *Mask = II->getArgOperand(3);
2495  auto *C = dyn_cast<ConstantInt>(Mask);
2496  // We don't need a select if we know the mask bit is a 1.
2497  if (!C || !C->getValue()[0]) {
2498  // Cast the mask to an i1 vector and then extract the lowest element.
2499  auto *MaskTy = VectorType::get(Builder.getInt1Ty(),
2500  cast<IntegerType>(Mask->getType())->getBitWidth());
2501  Mask = Builder.CreateBitCast(Mask, MaskTy);
2502  Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
2503  // Extract the lowest element from the passthru operand.
2504  Value *Passthru = Builder.CreateExtractElement(II->getArgOperand(2),
2505  (uint64_t)0);
2506  V = Builder.CreateSelect(Mask, V, Passthru);
2507  }
2508 
2509  // Insert the result back into the original argument 0.
2510  V = Builder.CreateInsertElement(Arg0, V, (uint64_t)0);
2511 
2512  return replaceInstUsesWith(*II, V);
2513  }
2514  }
2516 
2517  // X86 scalar intrinsics simplified with SimplifyDemandedVectorElts.
2518  case Intrinsic::x86_avx512_mask_max_ss_round:
2519  case Intrinsic::x86_avx512_mask_min_ss_round:
2520  case Intrinsic::x86_avx512_mask_max_sd_round:
2521  case Intrinsic::x86_avx512_mask_min_sd_round:
2522  case Intrinsic::x86_avx512_mask_vfmadd_ss:
2523  case Intrinsic::x86_avx512_mask_vfmadd_sd:
2524  case Intrinsic::x86_avx512_maskz_vfmadd_ss:
2525  case Intrinsic::x86_avx512_maskz_vfmadd_sd:
2526  case Intrinsic::x86_avx512_mask3_vfmadd_ss:
2527  case Intrinsic::x86_avx512_mask3_vfmadd_sd:
2528  case Intrinsic::x86_avx512_mask3_vfmsub_ss:
2529  case Intrinsic::x86_avx512_mask3_vfmsub_sd:
2530  case Intrinsic::x86_avx512_mask3_vfnmsub_ss:
2531  case Intrinsic::x86_avx512_mask3_vfnmsub_sd:
2532  case Intrinsic::x86_fma_vfmadd_ss:
2533  case Intrinsic::x86_fma_vfmsub_ss:
2534  case Intrinsic::x86_fma_vfnmadd_ss:
2535  case Intrinsic::x86_fma_vfnmsub_ss:
2536  case Intrinsic::x86_fma_vfmadd_sd:
2537  case Intrinsic::x86_fma_vfmsub_sd:
2538  case Intrinsic::x86_fma_vfnmadd_sd:
2539  case Intrinsic::x86_fma_vfnmsub_sd:
2540  case Intrinsic::x86_sse_cmp_ss:
2541  case Intrinsic::x86_sse_min_ss:
2542  case Intrinsic::x86_sse_max_ss:
2543  case Intrinsic::x86_sse2_cmp_sd:
2544  case Intrinsic::x86_sse2_min_sd:
2545  case Intrinsic::x86_sse2_max_sd:
2546  case Intrinsic::x86_sse41_round_ss:
2547  case Intrinsic::x86_sse41_round_sd:
2548  case Intrinsic::x86_xop_vfrcz_ss:
2549  case Intrinsic::x86_xop_vfrcz_sd: {
2550  unsigned VWidth = II->getType()->getVectorNumElements();
2551  APInt UndefElts(VWidth, 0);
2552  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
2553  if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
2554  if (V != II)
2555  return replaceInstUsesWith(*II, V);
2556  return II;
2557  }
2558  break;
2559  }
2560 
2561  // Constant fold ashr( <A x Bi>, Ci ).
2562  // Constant fold lshr( <A x Bi>, Ci ).
2563  // Constant fold shl( <A x Bi>, Ci ).
2564  case Intrinsic::x86_sse2_psrai_d:
2565  case Intrinsic::x86_sse2_psrai_w:
2566  case Intrinsic::x86_avx2_psrai_d:
2567  case Intrinsic::x86_avx2_psrai_w:
2568  case Intrinsic::x86_avx512_psrai_q_128:
2569  case Intrinsic::x86_avx512_psrai_q_256:
2570  case Intrinsic::x86_avx512_psrai_d_512:
2571  case Intrinsic::x86_avx512_psrai_q_512:
2572  case Intrinsic::x86_avx512_psrai_w_512:
2573  case Intrinsic::x86_sse2_psrli_d:
2574  case Intrinsic::x86_sse2_psrli_q:
2575  case Intrinsic::x86_sse2_psrli_w:
2576  case Intrinsic::x86_avx2_psrli_d:
2577  case Intrinsic::x86_avx2_psrli_q:
2578  case Intrinsic::x86_avx2_psrli_w:
2579  case Intrinsic::x86_avx512_psrli_d_512:
2580  case Intrinsic::x86_avx512_psrli_q_512:
2581  case Intrinsic::x86_avx512_psrli_w_512:
2582  case Intrinsic::x86_sse2_pslli_d:
2583  case Intrinsic::x86_sse2_pslli_q:
2584  case Intrinsic::x86_sse2_pslli_w:
2585  case Intrinsic::x86_avx2_pslli_d:
2586  case Intrinsic::x86_avx2_pslli_q:
2587  case Intrinsic::x86_avx2_pslli_w:
2588  case Intrinsic::x86_avx512_pslli_d_512:
2589  case Intrinsic::x86_avx512_pslli_q_512:
2590  case Intrinsic::x86_avx512_pslli_w_512:
2591  if (Value *V = simplifyX86immShift(*II, Builder))
2592  return replaceInstUsesWith(*II, V);
2593  break;
2594 
2595  case Intrinsic::x86_sse2_psra_d:
2596  case Intrinsic::x86_sse2_psra_w:
2597  case Intrinsic::x86_avx2_psra_d:
2598  case Intrinsic::x86_avx2_psra_w:
2599  case Intrinsic::x86_avx512_psra_q_128:
2600  case Intrinsic::x86_avx512_psra_q_256:
2601  case Intrinsic::x86_avx512_psra_d_512:
2602  case Intrinsic::x86_avx512_psra_q_512:
2603  case Intrinsic::x86_avx512_psra_w_512:
2604  case Intrinsic::x86_sse2_psrl_d:
2605  case Intrinsic::x86_sse2_psrl_q:
2606  case Intrinsic::x86_sse2_psrl_w:
2607  case Intrinsic::x86_avx2_psrl_d:
2608  case Intrinsic::x86_avx2_psrl_q:
2609  case Intrinsic::x86_avx2_psrl_w:
2610  case Intrinsic::x86_avx512_psrl_d_512:
2611  case Intrinsic::x86_avx512_psrl_q_512:
2612  case Intrinsic::x86_avx512_psrl_w_512:
2613  case Intrinsic::x86_sse2_psll_d:
2614  case Intrinsic::x86_sse2_psll_q:
2615  case Intrinsic::x86_sse2_psll_w:
2616  case Intrinsic::x86_avx2_psll_d:
2617  case Intrinsic::x86_avx2_psll_q:
2618  case Intrinsic::x86_avx2_psll_w:
2619  case Intrinsic::x86_avx512_psll_d_512:
2620  case Intrinsic::x86_avx512_psll_q_512:
2621  case Intrinsic::x86_avx512_psll_w_512: {
2622  if (Value *V = simplifyX86immShift(*II, Builder))
2623  return replaceInstUsesWith(*II, V);
2624 
2625  // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector
2626  // operand to compute the shift amount.
2627  Value *Arg1 = II->getArgOperand(1);
2628  assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 &&
2629  "Unexpected packed shift size");
2630  unsigned VWidth = Arg1->getType()->getVectorNumElements();
2631 
2632  if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2633  II->setArgOperand(1, V);
2634  return II;
2635  }
2636  break;
2637  }
2638 
2639  case Intrinsic::x86_avx2_psllv_d:
2640  case Intrinsic::x86_avx2_psllv_d_256:
2641  case Intrinsic::x86_avx2_psllv_q:
2642  case Intrinsic::x86_avx2_psllv_q_256:
2643  case Intrinsic::x86_avx512_psllv_d_512:
2644  case Intrinsic::x86_avx512_psllv_q_512:
2645  case Intrinsic::x86_avx512_psllv_w_128:
2646  case Intrinsic::x86_avx512_psllv_w_256:
2647  case Intrinsic::x86_avx512_psllv_w_512:
2648  case Intrinsic::x86_avx2_psrav_d:
2649  case Intrinsic::x86_avx2_psrav_d_256:
2650  case Intrinsic::x86_avx512_psrav_q_128:
2651  case Intrinsic::x86_avx512_psrav_q_256:
2652  case Intrinsic::x86_avx512_psrav_d_512:
2653  case Intrinsic::x86_avx512_psrav_q_512:
2654  case Intrinsic::x86_avx512_psrav_w_128:
2655  case Intrinsic::x86_avx512_psrav_w_256:
2656  case Intrinsic::x86_avx512_psrav_w_512:
2657  case Intrinsic::x86_avx2_psrlv_d:
2658  case Intrinsic::x86_avx2_psrlv_d_256:
2659  case Intrinsic::x86_avx2_psrlv_q:
2660  case Intrinsic::x86_avx2_psrlv_q_256:
2661  case Intrinsic::x86_avx512_psrlv_d_512:
2662  case Intrinsic::x86_avx512_psrlv_q_512:
2663  case Intrinsic::x86_avx512_psrlv_w_128:
2664  case Intrinsic::x86_avx512_psrlv_w_256:
2665  case Intrinsic::x86_avx512_psrlv_w_512:
2666  if (Value *V = simplifyX86varShift(*II, Builder))
2667  return replaceInstUsesWith(*II, V);
2668  break;
2669 
2670  case Intrinsic::x86_sse2_pmulu_dq:
2671  case Intrinsic::x86_sse41_pmuldq:
2672  case Intrinsic::x86_avx2_pmul_dq:
2673  case Intrinsic::x86_avx2_pmulu_dq:
2674  case Intrinsic::x86_avx512_pmul_dq_512:
2675  case Intrinsic::x86_avx512_pmulu_dq_512: {
2676  if (Value *V = simplifyX86muldq(*II, Builder))
2677  return replaceInstUsesWith(*II, V);
2678 
2679  unsigned VWidth = II->getType()->getVectorNumElements();
2680  APInt UndefElts(VWidth, 0);
2681  APInt DemandedElts = APInt::getAllOnesValue(VWidth);
2682  if (Value *V = SimplifyDemandedVectorElts(II, DemandedElts, UndefElts)) {
2683  if (V != II)
2684  return replaceInstUsesWith(*II, V);
2685  return II;
2686  }
2687  break;
2688  }
2689 
2690  case Intrinsic::x86_sse2_packssdw_128:
2691  case Intrinsic::x86_sse2_packsswb_128:
2692  case Intrinsic::x86_avx2_packssdw:
2693  case Intrinsic::x86_avx2_packsswb:
2694  case Intrinsic::x86_avx512_packssdw_512:
2695  case Intrinsic::x86_avx512_packsswb_512:
2696  if (Value *V = simplifyX86pack(*II, true))
2697  return replaceInstUsesWith(*II, V);
2698  break;
2699 
2700  case Intrinsic::x86_sse2_packuswb_128:
2701  case Intrinsic::x86_sse41_packusdw:
2702  case Intrinsic::x86_avx2_packusdw:
2703  case Intrinsic::x86_avx2_packuswb:
2704  case Intrinsic::x86_avx512_packusdw_512:
2705  case Intrinsic::x86_avx512_packuswb_512:
2706  if (Value *V = simplifyX86pack(*II, false))
2707  return replaceInstUsesWith(*II, V);
2708  break;
2709 
2710  case Intrinsic::x86_pclmulqdq: {
2711  if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
2712  unsigned Imm = C->getZExtValue();
2713 
2714  bool MadeChange = false;
2715  Value *Arg0 = II->getArgOperand(0);
2716  Value *Arg1 = II->getArgOperand(1);
2717  unsigned VWidth = Arg0->getType()->getVectorNumElements();
2718  APInt DemandedElts(VWidth, 0);
2719 
2720  APInt UndefElts1(VWidth, 0);
2721  DemandedElts = (Imm & 0x01) ? 2 : 1;
2722  if (Value *V = SimplifyDemandedVectorElts(Arg0, DemandedElts,
2723  UndefElts1)) {
2724  II->setArgOperand(0, V);
2725  MadeChange = true;
2726  }
2727 
2728  APInt UndefElts2(VWidth, 0);
2729  DemandedElts = (Imm & 0x10) ? 2 : 1;
2730  if (Value *V = SimplifyDemandedVectorElts(Arg1, DemandedElts,
2731  UndefElts2)) {
2732  II->setArgOperand(1, V);
2733  MadeChange = true;
2734  }
2735 
2736  // If both input elements are undef, the result is undef.
2737  if (UndefElts1[(Imm & 0x01) ? 1 : 0] ||
2738  UndefElts2[(Imm & 0x10) ? 1 : 0])
2739  return replaceInstUsesWith(*II,
2740  ConstantAggregateZero::get(II->getType()));
2741 
2742  if (MadeChange)
2743  return II;
2744  }
2745  break;
2746  }
2747 
2748  case Intrinsic::x86_sse41_insertps:
2749  if (Value *V = simplifyX86insertps(*II, Builder))
2750  return replaceInstUsesWith(*II, V);
2751  break;
2752 
2753  case Intrinsic::x86_sse4a_extrq: {
2754  Value *Op0 = II->getArgOperand(0);
2755  Value *Op1 = II->getArgOperand(1);
2756  unsigned VWidth0 = Op0->getType()->getVectorNumElements();
2757  unsigned VWidth1 = Op1->getType()->getVectorNumElements();
2758  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2759  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2760  VWidth1 == 16 && "Unexpected operand sizes");
2761 
2762  // See if we're dealing with constant values.
2763  Constant *C1 = dyn_cast<Constant>(Op1);
2764  ConstantInt *CILength =
2765  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
2766  : nullptr;
2767  ConstantInt *CIIndex =
2768  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2769  : nullptr;
2770 
2771  // Attempt to simplify to a constant, shuffle vector or EXTRQI call.
2772  if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder))
2773  return replaceInstUsesWith(*II, V);
2774 
2775  // EXTRQ only uses the lowest 64-bits of the first 128-bit vector
2776  // operands and the lowest 16-bits of the second.
2777  bool MadeChange = false;
2778  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2779  II->setArgOperand(0, V);
2780  MadeChange = true;
2781  }
2782  if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2783  II->setArgOperand(1, V);
2784  MadeChange = true;
2785  }
2786  if (MadeChange)
2787  return II;
2788  break;
2789  }
2790 
2791  case Intrinsic::x86_sse4a_extrqi: {
2792  // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining
2793  // bits of the lower 64-bits. The upper 64-bits are undefined.
2794  Value *Op0 = II->getArgOperand(0);
2795  unsigned VWidth = Op0->getType()->getVectorNumElements();
2796  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2797  "Unexpected operand size");
2798 
2799  // See if we're dealing with constant values.
2800  ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(1));
2801  ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(2));
2802 
2803  // Attempt to simplify to a constant or shuffle vector.
2804  if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder))
2805  return replaceInstUsesWith(*II, V);
2806 
2807  // EXTRQI only uses the lowest 64-bits of the first 128-bit vector
2808  // operand.
2809  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2810  II->setArgOperand(0, V);
2811  return II;
2812  }
2813  break;
2814  }
2815 
2816  case Intrinsic::x86_sse4a_insertq: {
2817  Value *Op0 = II->getArgOperand(0);
2818  Value *Op1 = II->getArgOperand(1);
2819  unsigned VWidth = Op0->getType()->getVectorNumElements();
2820  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2821  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2822  Op1->getType()->getVectorNumElements() == 2 &&
2823  "Unexpected operand size");
2824 
2825  // See if we're dealing with constant values.
2826  Constant *C1 = dyn_cast<Constant>(Op1);
2827  ConstantInt *CI11 =
2828  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2829  : nullptr;
2830 
2831  // Attempt to simplify to a constant, shuffle vector or INSERTQI call.
2832  if (CI11) {
2833  const APInt &V11 = CI11->getValue();
2834  APInt Len = V11.zextOrTrunc(6);
2835  APInt Idx = V11.lshr(8).zextOrTrunc(6);
2836  if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder))
2837  return replaceInstUsesWith(*II, V);
2838  }
2839 
2840  // INSERTQ only uses the lowest 64-bits of the first 128-bit vector
2841  // operand.
2842  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2843  II->setArgOperand(0, V);
2844  return II;
2845  }
2846  break;
2847  }
2848 
2849  case Intrinsic::x86_sse4a_insertqi: {
2850  // INSERTQI: Extract lowest Length bits from lower half of second source and
2851  // insert over first source starting at Index bit. The upper 64-bits are
2852  // undefined.
2853  Value *Op0 = II->getArgOperand(0);
2854  Value *Op1 = II->getArgOperand(1);
2855  unsigned VWidth0 = Op0->getType()->getVectorNumElements();
2856  unsigned VWidth1 = Op1->getType()->getVectorNumElements();
2857  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2858  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2859  VWidth1 == 2 && "Unexpected operand sizes");
2860 
2861  // See if we're dealing with constant values.
2862  ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(2));
2863  ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(3));
2864 
2865  // Attempt to simplify to a constant or shuffle vector.
2866  if (CILength && CIIndex) {
2867  APInt Len = CILength->getValue().zextOrTrunc(6);
2868  APInt Idx = CIIndex->getValue().zextOrTrunc(6);
2869  if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder))
2870  return replaceInstUsesWith(*II, V);
2871  }
2872 
2873  // INSERTQI only uses the lowest 64-bits of the first two 128-bit vector
2874  // operands.
2875  bool MadeChange = false;
2876  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2877  II->setArgOperand(0, V);
2878  MadeChange = true;
2879  }
2880  if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2881  II->setArgOperand(1, V);
2882  MadeChange = true;
2883  }
2884  if (MadeChange)
2885  return II;
2886  break;
2887  }
2888 
2889  case Intrinsic::x86_sse41_pblendvb:
2890  case Intrinsic::x86_sse41_blendvps:
2891  case Intrinsic::x86_sse41_blendvpd:
2892  case Intrinsic::x86_avx_blendv_ps_256:
2893  case Intrinsic::x86_avx_blendv_pd_256:
2894  case Intrinsic::x86_avx2_pblendvb: {
2895  // Convert blendv* to vector selects if the mask is constant.
2896  // This optimization is convoluted because the intrinsic is defined as
2897  // getting a vector of floats or doubles for the ps and pd versions.
2898  // FIXME: That should be changed.
2899 
2900  Value *Op0 = II->getArgOperand(0);
2901  Value *Op1 = II->getArgOperand(1);
2902  Value *Mask = II->getArgOperand(2);
2903 
2904  // fold (blend A, A, Mask) -> A
2905  if (Op0 == Op1)
2906  return replaceInstUsesWith(CI, Op0);
2907 
2908  // Zero Mask - select 1st argument.
2909  if (isa<ConstantAggregateZero>(Mask))
2910  return replaceInstUsesWith(CI, Op0);
2911 
2912  // Constant Mask - select 1st/2nd argument lane based on top bit of mask.
2913  if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
2914  Constant *NewSelector = getNegativeIsTrueBoolVec(ConstantMask);
2915  return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
2916  }
2917  break;
2918  }
2919 
2920  case Intrinsic::x86_ssse3_pshuf_b_128:
2921  case Intrinsic::x86_avx2_pshuf_b:
2922  case Intrinsic::x86_avx512_pshuf_b_512:
2923  if (Value *V = simplifyX86pshufb(*II, Builder))
2924  return replaceInstUsesWith(*II, V);
2925  break;
2926 
2927  case Intrinsic::x86_avx_vpermilvar_ps:
2928  case Intrinsic::x86_avx_vpermilvar_ps_256:
2929  case Intrinsic::x86_avx512_vpermilvar_ps_512:
2930  case Intrinsic::x86_avx_vpermilvar_pd:
2931  case Intrinsic::x86_avx_vpermilvar_pd_256:
2932  case Intrinsic::x86_avx512_vpermilvar_pd_512:
2933  if (Value *V = simplifyX86vpermilvar(*II, Builder))
2934  return replaceInstUsesWith(*II, V);
2935  break;
2936 
2937  case Intrinsic::x86_avx2_permd:
2938  case Intrinsic::x86_avx2_permps:
2939  if (Value *V = simplifyX86vpermv(*II, Builder))
2940  return replaceInstUsesWith(*II, V);
2941  break;
2942 
2943  case Intrinsic::x86_avx512_mask_permvar_df_256:
2944  case Intrinsic::x86_avx512_mask_permvar_df_512:
2945  case Intrinsic::x86_avx512_mask_permvar_di_256:
2946  case Intrinsic::x86_avx512_mask_permvar_di_512:
2947  case Intrinsic::x86_avx512_mask_permvar_hi_128:
2948  case Intrinsic::x86_avx512_mask_permvar_hi_256:
2949  case Intrinsic::x86_avx512_mask_permvar_hi_512:
2950  case Intrinsic::x86_avx512_mask_permvar_qi_128:
2951  case Intrinsic::x86_avx512_mask_permvar_qi_256:
2952  case Intrinsic::x86_avx512_mask_permvar_qi_512:
2953  case Intrinsic::x86_avx512_mask_permvar_sf_256:
2954  case Intrinsic::x86_avx512_mask_permvar_sf_512:
2955  case Intrinsic::x86_avx512_mask_permvar_si_256:
2956  case Intrinsic::x86_avx512_mask_permvar_si_512:
2957  if (Value *V = simplifyX86vpermv(*II, Builder)) {
2958  // We simplified the permuting, now create a select for the masking.
2959  V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2),
2960  Builder);
2961  return replaceInstUsesWith(*II, V);
2962  }
2963  break;
2964 
2965  case Intrinsic::x86_avx_maskload_ps:
2966  case Intrinsic::x86_avx_maskload_pd:
2967  case Intrinsic::x86_avx_maskload_ps_256:
2968  case Intrinsic::x86_avx_maskload_pd_256:
2969  case Intrinsic::x86_avx2_maskload_d:
2970  case Intrinsic::x86_avx2_maskload_q:
2971  case Intrinsic::x86_avx2_maskload_d_256:
2972  case Intrinsic::x86_avx2_maskload_q_256:
2973  if (Instruction *I = simplifyX86MaskedLoad(*II, *this))
2974  return I;
2975  break;
2976 
2977  case Intrinsic::x86_sse2_maskmov_dqu:
2978  case Intrinsic::x86_avx_maskstore_ps:
2979  case Intrinsic::x86_avx_maskstore_pd:
2980  case Intrinsic::x86_avx_maskstore_ps_256:
2981  case Intrinsic::x86_avx_maskstore_pd_256:
2982  case Intrinsic::x86_avx2_maskstore_d:
2983  case Intrinsic::x86_avx2_maskstore_q:
2984  case Intrinsic::x86_avx2_maskstore_d_256:
2985  case Intrinsic::x86_avx2_maskstore_q_256:
2986  if (simplifyX86MaskedStore(*II, *this))
2987  return nullptr;
2988  break;
2989 
2990  case Intrinsic::x86_xop_vpcomb:
2991  case Intrinsic::x86_xop_vpcomd:
2992  case Intrinsic::x86_xop_vpcomq:
2993  case Intrinsic::x86_xop_vpcomw:
2994  if (Value *V = simplifyX86vpcom(*II, Builder, true))
2995  return replaceInstUsesWith(*II, V);
2996  break;
2997 
2998  case Intrinsic::x86_xop_vpcomub:
2999  case Intrinsic::x86_xop_vpcomud:
3000  case Intrinsic::x86_xop_vpcomuq:
3001  case Intrinsic::x86_xop_vpcomuw:
3002  if (Value *V = simplifyX86vpcom(*II, Builder, false))
3003  return replaceInstUsesWith(*II, V);
3004  break;
3005 
3006  case Intrinsic::ppc_altivec_vperm:
3007  // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
3008  // Note that ppc_altivec_vperm has a big-endian bias, so when creating
3009  // a vectorshuffle for little endian, we must undo the transformation
3010  // performed on vec_perm in altivec.h. That is, we must complement
3011  // the permutation mask with respect to 31 and reverse the order of
3012  // V1 and V2.
3013  if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
3014  assert(Mask->getType()->getVectorNumElements() == 16 &&
3015  "Bad type for intrinsic!");
3016 
3017  // Check that all of the elements are integer constants or undefs.
3018  bool AllEltsOk = true;
3019  for (unsigned i = 0; i != 16; ++i) {
3020  Constant *Elt = Mask->getAggregateElement(i);
3021  if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
3022  AllEltsOk = false;
3023  break;
3024  }
3025  }
3026 
3027  if (AllEltsOk) {
3028  // Cast the input vectors to byte vectors.
3029  Value *Op0 = Builder.CreateBitCast(II->getArgOperand(0),
3030  Mask->getType());
3031  Value *Op1 = Builder.CreateBitCast(II->getArgOperand(1),
3032  Mask->getType());
3033  Value *Result = UndefValue::get(Op0->getType());
3034 
3035  // Only extract each element once.
3036  Value *ExtractedElts[32];
3037  memset(ExtractedElts, 0, sizeof(ExtractedElts));
3038 
3039  for (unsigned i = 0; i != 16; ++i) {
3040  if (isa<UndefValue>(Mask->getAggregateElement(i)))
3041  continue;
3042  unsigned Idx =
3043  cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
3044  Idx &= 31; // Match the hardware behavior.
3045  if (DL.isLittleEndian())
3046  Idx = 31 - Idx;
3047 
3048  if (!ExtractedElts[Idx]) {
3049  Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
3050  Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
3051  ExtractedElts[Idx] =
3052  Builder.CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
3053  Builder.getInt32(Idx&15));
3054  }
3055 
3056  // Insert this value into the result vector.
3057  Result = Builder.CreateInsertElement(Result, ExtractedElts[Idx],
3058  Builder.getInt32(i));
3059  }
3060  return CastInst::Create(Instruction::BitCast, Result, CI.getType());
3061  }
3062  }
3063  break;
3064 
3065  case Intrinsic::arm_neon_vld1:
3066  case Intrinsic::arm_neon_vld2:
3067  case Intrinsic::arm_neon_vld3:
3068  case Intrinsic::arm_neon_vld4:
3069  case Intrinsic::arm_neon_vld2lane:
3070  case Intrinsic::arm_neon_vld3lane:
3071  case Intrinsic::arm_neon_vld4lane:
3072  case Intrinsic::arm_neon_vst1:
3073  case Intrinsic::arm_neon_vst2:
3074  case Intrinsic::arm_neon_vst3:
3075  case Intrinsic::arm_neon_vst4:
3076  case Intrinsic::arm_neon_vst2lane:
3077  case Intrinsic::arm_neon_vst3lane:
3078  case Intrinsic::arm_neon_vst4lane: {
3079  unsigned MemAlign =
3080  getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
3081  unsigned AlignArg = II->getNumArgOperands() - 1;
3082  ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
3083  if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
3084  II->setArgOperand(AlignArg,
3085  ConstantInt::get(Type::getInt32Ty(II->getContext()),
3086  MemAlign, false));
3087  return II;
3088  }
3089  break;
3090  }
3091 
3092  case Intrinsic::arm_neon_vmulls:
3093  case Intrinsic::arm_neon_vmullu:
3094  case Intrinsic::aarch64_neon_smull:
3095  case Intrinsic::aarch64_neon_umull: {
3096  Value *Arg0 = II->getArgOperand(0);
3097  Value *Arg1 = II->getArgOperand(1);
3098 
3099  // Handle mul by zero first:
3100  if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
3101  return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
3102  }
3103 
3104  // Check for constant LHS & RHS - in this case we just simplify.
3105  bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu ||
3106  II->getIntrinsicID() == Intrinsic::aarch64_neon_umull);
3107  VectorType *NewVT = cast<VectorType>(II->getType());
3108  if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
3109  if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
3110  CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext);
3111  CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext);
3112 
3113  return replaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1));
3114  }
3115 
3116  // Couldn't simplify - canonicalize constant to the RHS.
3117  std::swap(Arg0, Arg1);
3118  }
3119 
3120  // Handle mul by one:
3121  if (Constant *CV1 = dyn_cast<Constant>(Arg1))
3122  if (ConstantInt *Splat =
3123  dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
3124  if (Splat->isOne())
3125  return CastInst::CreateIntegerCast(Arg0, II->getType(),
3126  /*isSigned=*/!Zext);
3127 
3128  break;
3129  }
3130  case Intrinsic::amdgcn_rcp: {
3131  Value *Src = II->getArgOperand(0);
3132 
3133  // TODO: Move to ConstantFolding/InstSimplify?
3134  if (isa<UndefValue>(Src))
3135  return replaceInstUsesWith(CI, Src);
3136 
3137  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
3138  const APFloat &ArgVal = C->getValueAPF();
3139  APFloat Val(ArgVal.getSemantics(), 1.0);
3140  APFloat::opStatus Status = Val.divide(ArgVal,
3142  // Only do this if it was exact and therefore not dependent on the
3143  // rounding mode.
3144  if (Status == APFloat::opOK)
3145  return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val));
3146  }
3147 
3148  break;
3149  }
3150  case Intrinsic::amdgcn_rsq: {
3151  Value *Src = II->getArgOperand(0);
3152 
3153  // TODO: Move to ConstantFolding/InstSimplify?
3154  if (isa<UndefValue>(Src))
3155  return replaceInstUsesWith(CI, Src);
3156  break;
3157  }
3158  case Intrinsic::amdgcn_frexp_mant:
3159  case Intrinsic::amdgcn_frexp_exp: {
3160  Value *Src = II->getArgOperand(0);
3161  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
3162  int Exp;
3163  APFloat Significand = frexp(C->getValueAPF(), Exp,
3165 
3166  if (II->getIntrinsicID() == Intrinsic::amdgcn_frexp_mant) {
3167  return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(),
3168  Significand));
3169  }
3170 
3171  // Match instruction special case behavior.
3172  if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
3173  Exp = 0;
3174 
3175  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Exp));
3176  }
3177 
3178  if (isa<UndefValue>(Src))
3179  return replaceInstUsesWith(CI, UndefValue::get(II->getType()));
3180 
3181  break;
3182  }
3183  case Intrinsic::amdgcn_class: {
3184  enum {
3185  S_NAN = 1 << 0, // Signaling NaN
3186  Q_NAN = 1 << 1, // Quiet NaN
3187  N_INFINITY = 1 << 2, // Negative infinity
3188  N_NORMAL = 1 << 3, // Negative normal
3189  N_SUBNORMAL = 1 << 4, // Negative subnormal
3190  N_ZERO = 1 << 5, // Negative zero
3191  P_ZERO = 1 << 6, // Positive zero
3192  P_SUBNORMAL = 1 << 7, // Positive subnormal
3193  P_NORMAL = 1 << 8, // Positive normal
3194  P_INFINITY = 1 << 9 // Positive infinity
3195  };
3196 
3197  const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL |
3199 
3200  Value *Src0 = II->getArgOperand(0);
3201  Value *Src1 = II->getArgOperand(1);
3202  const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
3203  if (!CMask) {
3204  if (isa<UndefValue>(Src0))
3205  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3206 
3207  if (isa<UndefValue>(Src1))
3208  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
3209  break;
3210  }
3211 
3212  uint32_t Mask = CMask->getZExtValue();
3213 
3214  // If all tests are made, it doesn't matter what the value is.
3215  if ((Mask & FullMask) == FullMask)
3216  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), true));
3217 
3218  if ((Mask & FullMask) == 0)
3219  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
3220 
3221  if (Mask == (S_NAN | Q_NAN)) {
3222  // Equivalent of isnan. Replace with standard fcmp.
3223  Value *FCmp = Builder.CreateFCmpUNO(Src0, Src0);
3224  FCmp->takeName(II);
3225  return replaceInstUsesWith(*II, FCmp);
3226  }
3227 
3228  const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
3229  if (!CVal) {
3230  if (isa<UndefValue>(Src0))
3231  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3232 
3233  // Clamp mask to used bits
3234  if ((Mask & FullMask) != Mask) {
3235  CallInst *NewCall = Builder.CreateCall(II->getCalledFunction(),
3236  { Src0, ConstantInt::get(Src1->getType(), Mask & FullMask) }
3237  );
3238 
3239  NewCall->takeName(II);
3240  return replaceInstUsesWith(*II, NewCall);
3241  }
3242 
3243  break;
3244  }
3245 
3246  const APFloat &Val = CVal->getValueAPF();
3247 
3248  bool Result =
3249  ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) ||
3250  ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) ||
3251  ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) ||
3252  ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) ||
3253  ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) ||
3254  ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) ||
3255  ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) ||
3256  ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) ||
3257  ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) ||
3258  ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative());
3259 
3260  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), Result));
3261  }
3262  case Intrinsic::amdgcn_cvt_pkrtz: {
3263  Value *Src0 = II->getArgOperand(0);
3264  Value *Src1 = II->getArgOperand(1);
3265  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
3266  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
3267  const fltSemantics &HalfSem
3268  = II->getType()->getScalarType()->getFltSemantics();
3269  bool LosesInfo;
3270  APFloat Val0 = C0->getValueAPF();
3271  APFloat Val1 = C1->getValueAPF();
3272  Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
3273  Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
3274 
3275  Constant *Folded = ConstantVector::get({
3276  ConstantFP::get(II->getContext(), Val0),
3277  ConstantFP::get(II->getContext(), Val1) });
3278  return replaceInstUsesWith(*II, Folded);
3279  }
3280  }
3281 
3282  if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1))
3283  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3284 
3285  break;
3286  }
3287  case Intrinsic::amdgcn_ubfe:
3288  case Intrinsic::amdgcn_sbfe: {
3289  // Decompose simple cases into standard shifts.
3290  Value *Src = II->getArgOperand(0);
3291  if (isa<UndefValue>(Src))
3292  return replaceInstUsesWith(*II, Src);
3293 
3294  unsigned Width;
3295  Type *Ty = II->getType();
3296  unsigned IntSize = Ty->getIntegerBitWidth();
3297 
3298  ConstantInt *CWidth = dyn_cast<ConstantInt>(II->getArgOperand(2));
3299  if (CWidth) {
3300  Width = CWidth->getZExtValue();
3301  if ((Width & (IntSize - 1)) == 0)
3302  return replaceInstUsesWith(*II, ConstantInt::getNullValue(Ty));
3303 
3304  if (Width >= IntSize) {
3305  // Hardware ignores high bits, so remove those.
3306  II->setArgOperand(2, ConstantInt::get(CWidth->getType(),
3307  Width & (IntSize - 1)));
3308  return II;
3309  }
3310  }
3311 
3312  unsigned Offset;
3313  ConstantInt *COffset = dyn_cast<ConstantInt>(II->getArgOperand(1));
3314  if (COffset) {
3315  Offset = COffset->getZExtValue();
3316  if (Offset >= IntSize) {
3317  II->setArgOperand(1, ConstantInt::get(COffset->getType(),
3318  Offset & (IntSize - 1)));
3319  return II;
3320  }
3321  }
3322 
3323  bool Signed = II->getIntrinsicID() == Intrinsic::amdgcn_sbfe;
3324 
3325  // TODO: Also emit sub if only width is constant.
3326  if (!CWidth && COffset && Offset == 0) {
3327  Constant *KSize = ConstantInt::get(COffset->getType(), IntSize);
3328  Value *ShiftVal = Builder.CreateSub(KSize, II->getArgOperand(2));
3329  ShiftVal = Builder.CreateZExt(ShiftVal, II->getType());
3330 
3331  Value *Shl = Builder.CreateShl(Src, ShiftVal);
3332  Value *RightShift = Signed ? Builder.CreateAShr(Shl, ShiftVal)
3333  : Builder.CreateLShr(Shl, ShiftVal);
3334  RightShift->takeName(II);
3335  return replaceInstUsesWith(*II, RightShift);
3336  }
3337 
3338  if (!CWidth || !COffset)
3339  break;
3340 
3341  // TODO: This allows folding to undef when the hardware has specific
3342  // behavior?
3343  if (Offset + Width < IntSize) {
3344  Value *Shl = Builder.CreateShl(Src, IntSize - Offset - Width);
3345  Value *RightShift = Signed ? Builder.CreateAShr(Shl, IntSize - Width)
3346  : Builder.CreateLShr(Shl, IntSize - Width);
3347  RightShift->takeName(II);
3348  return replaceInstUsesWith(*II, RightShift);
3349  }
3350 
3351  Value *RightShift = Signed ? Builder.CreateAShr(Src, Offset)
3352  : Builder.CreateLShr(Src, Offset);
3353 
3354  RightShift->takeName(II);
3355  return replaceInstUsesWith(*II, RightShift);
3356  }
3357  case Intrinsic::amdgcn_exp:
3358  case Intrinsic::amdgcn_exp_compr: {
3359  ConstantInt *En = dyn_cast<ConstantInt>(II->getArgOperand(1));
3360  if (!En) // Illegal.
3361  break;
3362 
3363  unsigned EnBits = En->getZExtValue();
3364  if (EnBits == 0xf)
3365  break; // All inputs enabled.
3366 
3367  bool IsCompr = II->getIntrinsicID() == Intrinsic::amdgcn_exp_compr;
3368  bool Changed = false;
3369  for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
3370  if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
3371  (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
3372  Value *Src = II->getArgOperand(I + 2);
3373  if (!isa<UndefValue>(Src)) {
3374  II->setArgOperand(I + 2, UndefValue::get(Src->getType()));
3375  Changed = true;
3376  }
3377  }
3378  }
3379 
3380  if (Changed)
3381  return II;
3382 
3383  break;
3384  }
3385  case Intrinsic::amdgcn_fmed3: {
3386  // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
3387  // for the shader.
3388 
3389  Value *Src0 = II->getArgOperand(0);
3390  Value *Src1 = II->getArgOperand(1);
3391  Value *Src2 = II->getArgOperand(2);
3392 
3393  bool Swap = false;
3394  // Canonicalize constants to RHS operands.
3395  //
3396  // fmed3(c0, x, c1) -> fmed3(x, c0, c1)
3397  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
3398  std::swap(Src0, Src1);
3399  Swap = true;
3400  }
3401 
3402  if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
3403  std::swap(Src1, Src2);
3404  Swap = true;
3405  }
3406 
3407  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
3408  std::swap(Src0, Src1);
3409  Swap = true;
3410  }
3411 
3412  if (Swap) {
3413  II->setArgOperand(0, Src0);
3414  II->setArgOperand(1, Src1);
3415  II->setArgOperand(2, Src2);
3416  return II;
3417  }
3418 
3419  if (match(Src2, m_NaN()) || isa<UndefValue>(Src2)) {
3420  CallInst *NewCall = Builder.CreateMinNum(Src0, Src1);
3421  NewCall->copyFastMathFlags(II);
3422  NewCall->takeName(II);
3423  return replaceInstUsesWith(*II, NewCall);
3424  }
3425 
3426  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
3427  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
3428  if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
3429  APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
3430  C2->getValueAPF());
3431  return replaceInstUsesWith(*II,
3432  ConstantFP::get(Builder.getContext(), Result));
3433  }
3434  }
3435  }
3436 
3437  break;
3438  }
3439  case Intrinsic::amdgcn_icmp:
3440  case Intrinsic::amdgcn_fcmp: {
3441  const ConstantInt *CC = dyn_cast<ConstantInt>(II->getArgOperand(2));
3442  if (!CC)
3443  break;
3444 
3445  // Guard against invalid arguments.
3446  int64_t CCVal = CC->getZExtValue();
3447  bool IsInteger = II->getIntrinsicID() == Intrinsic::amdgcn_icmp;
3448  if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
3449  CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
3450  (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
3451  CCVal > CmpInst::LAST_FCMP_PREDICATE)))
3452  break;
3453 
3454  Value *Src0 = II->getArgOperand(0);
3455  Value *Src1 = II->getArgOperand(1);
3456 
3457  if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
3458  if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
3459  Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
3460  if (CCmp->isNullValue()) {
3461  return replaceInstUsesWith(
3462  *II, ConstantExpr::getSExt(CCmp, II->getType()));
3463  }
3464 
3465  // The result of V_ICMP/V_FCMP assembly instructions (which this
3466  // intrinsic exposes) is one bit per thread, masked with the EXEC
3467  // register (which contains the bitmask of live threads). So a
3468  // comparison that always returns true is the same as a read of the
3469  // EXEC register.
3471  II->getModule(), Intrinsic::read_register, II->getType());
3472  Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")};
3473  MDNode *MD = MDNode::get(II->getContext(), MDArgs);
3474  Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)};
3475  CallInst *NewCall = Builder.CreateCall(NewF, Args);
3478  NewCall->takeName(II);
3479  return replaceInstUsesWith(*II, NewCall);
3480  }
3481 
3482  // Canonicalize constants to RHS.
3483  CmpInst::Predicate SwapPred
3484  = CmpInst::getSwappedPredicate(static_cast<CmpInst::Predicate>(CCVal));
3485  II->setArgOperand(0, Src1);
3486  II->setArgOperand(1, Src0);
3487  II->setArgOperand(2, ConstantInt::get(CC->getType(),
3488  static_cast<int>(SwapPred)));
3489  return II;
3490  }
3491 
3492  if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
3493  break;
3494 
3495  // Canonicalize compare eq with true value to compare != 0
3496  // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
3497  // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
3498  // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
3499  // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
3500  Value *ExtSrc;
3501  if (CCVal == CmpInst::ICMP_EQ &&
3502  ((match(Src1, m_One()) && match(Src0, m_ZExt(m_Value(ExtSrc)))) ||
3503  (match(Src1, m_AllOnes()) && match(Src0, m_SExt(m_Value(ExtSrc))))) &&
3504  ExtSrc->getType()->isIntegerTy(1)) {
3505  II->setArgOperand(1, ConstantInt::getNullValue(Src1->getType()));
3506  II->setArgOperand(2, ConstantInt::get(CC->getType(), CmpInst::ICMP_NE));
3507  return II;
3508  }
3509 
3510  CmpInst::Predicate SrcPred;
3511  Value *SrcLHS;
3512  Value *SrcRHS;
3513 
3514  // Fold compare eq/ne with 0 from a compare result as the predicate to the
3515  // intrinsic. The typical use is a wave vote function in the library, which
3516  // will be fed from a user code condition compared with 0. Fold in the
3517  // redundant compare.
3518 
3519  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
3520  // -> llvm.amdgcn.[if]cmp(a, b, pred)
3521  //
3522  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
3523  // -> llvm.amdgcn.[if]cmp(a, b, inv pred)
3524  if (match(Src1, m_Zero()) &&
3525  match(Src0,
3526  m_ZExtOrSExt(m_Cmp(SrcPred, m_Value(SrcLHS), m_Value(SrcRHS))))) {
3527  if (CCVal == CmpInst::ICMP_EQ)
3528  SrcPred = CmpInst::getInversePredicate(SrcPred);
3529 
3530  Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred) ?
3531  Intrinsic::amdgcn_fcmp : Intrinsic::amdgcn_icmp;
3532 
3533  Value *NewF = Intrinsic::getDeclaration(II->getModule(), NewIID,
3534  SrcLHS->getType());
3535  Value *Args[] = { SrcLHS, SrcRHS,
3536  ConstantInt::get(CC->getType(), SrcPred) };
3537  CallInst *NewCall = Builder.CreateCall(NewF, Args);
3538  NewCall->takeName(II);
3539  return replaceInstUsesWith(*II, NewCall);
3540  }
3541 
3542  break;
3543  }
3544  case Intrinsic::amdgcn_wqm_vote: {
3545  // wqm_vote is identity when the argument is constant.
3546  if (!isa<Constant>(II->getArgOperand(0)))
3547  break;
3548 
3549  return replaceInstUsesWith(*II, II->getArgOperand(0));
3550  }
3551  case Intrinsic::amdgcn_kill: {
3552  const ConstantInt *C = dyn_cast<ConstantInt>(II->getArgOperand(0));
3553  if (!C || !C->getZExtValue())
3554  break;
3555 
3556  // amdgcn.kill(i1 1) is a no-op
3557  return eraseInstFromFunction(CI);
3558  }
3559  case Intrinsic::stackrestore: {
3560  // If the save is right next to the restore, remove the restore. This can
3561  // happen when variable allocas are DCE'd.
3562  if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3563  if (SS->getIntrinsicID() == Intrinsic::stacksave) {
3564  if (&*++SS->getIterator() == II)
3565  return eraseInstFromFunction(CI);
3566  }
3567  }
3568 
3569  // Scan down this block to see if there is another stack restore in the
3570  // same block without an intervening call/alloca.
3571  BasicBlock::iterator BI(II);
3572  TerminatorInst *TI = II->getParent()->getTerminator();
3573  bool CannotRemove = false;
3574  for (++BI; &*BI != TI; ++BI) {
3575  if (isa<AllocaInst>(BI)) {
3576  CannotRemove = true;
3577  break;
3578  }
3579  if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
3580  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
3581  // If there is a stackrestore below this one, remove this one.
3582  if (II->getIntrinsicID() == Intrinsic::stackrestore)
3583  return eraseInstFromFunction(CI);
3584 
3585  // Bail if we cross over an intrinsic with side effects, such as
3586  // llvm.stacksave, llvm.read_register, or llvm.setjmp.
3587  if (II->mayHaveSideEffects()) {
3588  CannotRemove = true;
3589  break;
3590  }
3591  } else {
3592  // If we found a non-intrinsic call, we can't remove the stack
3593  // restore.
3594  CannotRemove = true;
3595  break;
3596  }
3597  }
3598  }
3599 
3600  // If the stack restore is in a return, resume, or unwind block and if there
3601  // are no allocas or calls between the restore and the return, nuke the
3602  // restore.
3603  if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
3604  return eraseInstFromFunction(CI);
3605  break;
3606  }
3607  case Intrinsic::lifetime_start:
3608  // Asan needs to poison memory to detect invalid access which is possible
3609  // even for empty lifetime range.
3610  if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress))
3611  break;
3612 
3613  if (removeTriviallyEmptyRange(*II, Intrinsic::lifetime_start,
3614  Intrinsic::lifetime_end, *this))
3615  return nullptr;
3616  break;
3617  case Intrinsic::assume: {
3618  Value *IIOperand = II->getArgOperand(0);
3619  // Remove an assume if it is immediately followed by an identical assume.
3620  if (match(II->getNextNode(),
3621  m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
3622  return eraseInstFromFunction(CI);
3623 
3624  // Canonicalize assume(a && b) -> assume(a); assume(b);
3625  // Note: New assumption intrinsics created here are registered by
3626  // the InstCombineIRInserter object.
3627  Value *AssumeIntrinsic = II->getCalledValue(), *A, *B;
3628  if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
3629  Builder.CreateCall(AssumeIntrinsic, A, II->getName());
3630  Builder.CreateCall(AssumeIntrinsic, B, II->getName());
3631  return eraseInstFromFunction(*II);
3632  }
3633  // assume(!(a || b)) -> assume(!a); assume(!b);
3634  if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
3635  Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(A), II->getName());
3636  Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(B), II->getName());
3637  return eraseInstFromFunction(*II);
3638  }
3639 
3640  // assume( (load addr) != null ) -> add 'nonnull' metadata to load
3641  // (if assume is valid at the load)
3642  CmpInst::Predicate Pred;
3643  Instruction *LHS;
3644  if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) &&
3645  Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
3646  LHS->getType()->isPointerTy() &&
3647  isValidAssumeForContext(II, LHS, &DT)) {
3648  MDNode *MD = MDNode::get(II->getContext(), None);
3650  return eraseInstFromFunction(*II);
3651 
3652  // TODO: apply nonnull return attributes to calls and invokes
3653  // TODO: apply range metadata for range check patterns?
3654  }
3655 
3656  // If there is a dominating assume with the same condition as this one,
3657  // then this one is redundant, and should be removed.
3658  KnownBits Known(1);
3659  computeKnownBits(IIOperand, Known, 0, II);
3660  if (Known.isAllOnes())
3661  return eraseInstFromFunction(*II);
3662 
3663  // Update the cache of affected values for this assumption (we might be
3664  // here because we just simplified the condition).
3665  AC.updateAffectedValues(II);
3666  break;
3667  }
3668  case Intrinsic::experimental_gc_relocate: {
3669  // Translate facts known about a pointer before relocating into
3670  // facts about the relocate value, while being careful to
3671  // preserve relocation semantics.
3672  Value *DerivedPtr = cast<GCRelocateInst>(II)->getDerivedPtr();
3673 
3674  // Remove the relocation if unused, note that this check is required
3675  // to prevent the cases below from looping forever.
3676  if (II->use_empty())
3677  return eraseInstFromFunction(*II);
3678 
3679  // Undef is undef, even after relocation.
3680  // TODO: provide a hook for this in GCStrategy. This is clearly legal for
3681  // most practical collectors, but there was discussion in the review thread
3682  // about whether it was legal for all possible collectors.
3683  if (isa<UndefValue>(DerivedPtr))
3684  // Use undef of gc_relocate's type to replace it.
3685  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3686 
3687  if (auto *PT = dyn_cast<PointerType>(II->getType())) {
3688  // The relocation of null will be null for most any collector.
3689  // TODO: provide a hook for this in GCStrategy. There might be some
3690  // weird collector this property does not hold for.
3691  if (isa<ConstantPointerNull>(DerivedPtr))
3692  // Use null-pointer of gc_relocate's type to replace it.
3693  return replaceInstUsesWith(*II, ConstantPointerNull::get(PT));
3694 
3695  // isKnownNonNull -> nonnull attribute
3696  if (isKnownNonZero(DerivedPtr, DL, 0, &AC, II, &DT))
3697  II->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
3698  }
3699 
3700  // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
3701  // Canonicalize on the type from the uses to the defs
3702 
3703  // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
3704  break;
3705  }
3706 
3707  case Intrinsic::experimental_guard: {
3708  // Is this guard followed by another guard?
3709  Instruction *NextInst = II->getNextNode();
3710  Value *NextCond = nullptr;
3711  if (match(NextInst,
3712  m_Intrinsic<Intrinsic::experimental_guard>(m_Value(NextCond)))) {
3713  Value *CurrCond = II->getArgOperand(0);
3714 
3715  // Remove a guard that it is immediately preceded by an identical guard.
3716  if (CurrCond == NextCond)
3717  return eraseInstFromFunction(*NextInst);
3718 
3719  // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3720  II->setArgOperand(0, Builder.CreateAnd(CurrCond, NextCond));
3721  return eraseInstFromFunction(*NextInst);
3722  }
3723  break;
3724  }
3725  }
3726  return visitCallSite(II);
3727 }
3728 
3729 // Fence instruction simplification
3731  // Remove identical consecutive fences.
3732  if (auto *NFI = dyn_cast<FenceInst>(FI.getNextNode()))
3733  if (FI.isIdenticalTo(NFI))
3734  return eraseInstFromFunction(FI);
3735  return nullptr;
3736 }
3737 
3738 // InvokeInst simplification
3740  return visitCallSite(&II);
3741 }
3742 
3743 /// If this cast does not affect the value passed through the varargs area, we
3744 /// can eliminate the use of the cast.
3746  const DataLayout &DL,
3747  const CastInst *const CI,
3748  const int ix) {
3749  if (!CI->isLosslessCast())
3750  return false;
3751 
3752  // If this is a GC intrinsic, avoid munging types. We need types for
3753  // statepoint reconstruction in SelectionDAG.
3754  // TODO: This is probably something which should be expanded to all
3755  // intrinsics since the entire point of intrinsics is that
3756  // they are understandable by the optimizer.
3757  if (isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS))
3758  return false;
3759 
3760  // The size of ByVal or InAlloca arguments is derived from the type, so we
3761  // can't change to a type with a different size. If the size were
3762  // passed explicitly we could avoid this check.
3763  if (!CS.isByValOrInAllocaArgument(ix))
3764  return true;
3765 
3766  Type* SrcTy =
3767  cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
3768  Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
3769  if (!SrcTy->isSized() || !DstTy->isSized())
3770  return false;
3771  if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
3772  return false;
3773  return true;
3774 }
3775 
3776 Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
3777  if (!CI->getCalledFunction()) return nullptr;
3778 
3779  auto InstCombineRAUW = [this](Instruction *From, Value *With) {
3780  replaceInstUsesWith(*From, With);
3781  };
3782  LibCallSimplifier Simplifier(DL, &TLI, ORE, InstCombineRAUW);
3783  if (Value *With = Simplifier.optimizeCall(CI)) {
3784  ++NumSimplified;
3785  return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
3786  }
3787 
3788  return nullptr;
3789 }
3790 
3792  // Strip off at most one level of pointer casts, looking for an alloca. This
3793  // is good enough in practice and simpler than handling any number of casts.
3794  Value *Underlying = TrampMem->stripPointerCasts();
3795  if (Underlying != TrampMem &&
3796  (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
3797  return nullptr;
3798  if (!isa<AllocaInst>(Underlying))
3799  return nullptr;
3800 
3801  IntrinsicInst *InitTrampoline = nullptr;
3802  for (User *U : TrampMem->users()) {
3804  if (!II)
3805  return nullptr;
3806  if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
3807  if (InitTrampoline)
3808  // More than one init_trampoline writes to this value. Give up.
3809  return nullptr;
3810  InitTrampoline = II;
3811  continue;
3812  }
3813  if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
3814  // Allow any number of calls to adjust.trampoline.
3815  continue;
3816  return nullptr;
3817  }
3818 
3819  // No call to init.trampoline found.
3820  if (!InitTrampoline)
3821  return nullptr;
3822 
3823  // Check that the alloca is being used in the expected way.
3824  if (InitTrampoline->getOperand(0) != TrampMem)
3825  return nullptr;
3826 
3827  return InitTrampoline;
3828 }
3829 
3831  Value *TrampMem) {
3832  // Visit all the previous instructions in the basic block, and try to find a
3833  // init.trampoline which has a direct path to the adjust.trampoline.
3834  for (BasicBlock::iterator I = AdjustTramp->getIterator(),
3835  E = AdjustTramp->getParent()->begin();
3836  I != E;) {
3837  Instruction *Inst = &*--I;
3838  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
3839  if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
3840  II->getOperand(0) == TrampMem)
3841  return II;
3842  if (Inst->mayWriteToMemory())
3843  return nullptr;
3844  }
3845  return nullptr;
3846 }
3847 
3848 // Given a call to llvm.adjust.trampoline, find and return the corresponding
3849 // call to llvm.init.trampoline if the call to the trampoline can be optimized
3850 // to a direct call to a function. Otherwise return NULL.
3852  Callee = Callee->stripPointerCasts();
3853  IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
3854  if (!AdjustTramp ||
3855  AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
3856  return nullptr;
3857 
3858  Value *TrampMem = AdjustTramp->getOperand(0);
3859 
3861  return IT;
3862  if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
3863  return IT;
3864  return nullptr;
3865 }
3866 
3867 /// Improvements for call and invoke instructions.
3868 Instruction *InstCombiner::visitCallSite(CallSite CS) {
3869  if (isAllocLikeFn(CS.getInstruction(), &TLI))
3870  return visitAllocSite(*CS.getInstruction());
3871 
3872  bool Changed = false;
3873 
3874  // Mark any parameters that are known to be non-null with the nonnull
3875  // attribute. This is helpful for inlining calls to functions with null
3876  // checks on their arguments.
3877  SmallVector<unsigned, 4> ArgNos;
3878  unsigned ArgNo = 0;
3879 
3880  for (Value *V : CS.args()) {
3881  if (V->getType()->isPointerTy() &&
3882  !CS.paramHasAttr(ArgNo, Attribute::NonNull) &&
3883  isKnownNonZero(V, DL, 0, &AC, CS.getInstruction(), &DT))
3884  ArgNos.push_back(ArgNo);
3885  ArgNo++;
3886  }
3887 
3888  assert(ArgNo == CS.arg_size() && "sanity check");
3889 
3890  if (!ArgNos.empty()) {
3892  LLVMContext &Ctx = CS.getInstruction()->getContext();
3893  AS = AS.addParamAttribute(Ctx, ArgNos,
3894  Attribute::get(Ctx, Attribute::NonNull));
3895  CS.setAttributes(AS);
3896  Changed = true;
3897  }
3898 
3899  // If the callee is a pointer to a function, attempt to move any casts to the
3900  // arguments of the call/invoke.
3901  Value *Callee = CS.getCalledValue();
3902  if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
3903  return nullptr;
3904 
3905  if (Function *CalleeF = dyn_cast<Function>(Callee)) {
3906  // Remove the convergent attr on calls when the callee is not convergent.
3907  if (CS.isConvergent() && !CalleeF->isConvergent() &&
3908  !CalleeF->isIntrinsic()) {
3909  DEBUG(dbgs() << "Removing convergent attr from instr "
3910  << CS.getInstruction() << "\n");
3911  CS.setNotConvergent();
3912  return CS.getInstruction();
3913  }
3914 
3915  // If the call and callee calling conventions don't match, this call must
3916  // be unreachable, as the call is undefined.
3917  if (CalleeF->getCallingConv() != CS.getCallingConv() &&
3918  // Only do this for calls to a function with a body. A prototype may
3919  // not actually end up matching the implementation's calling conv for a
3920  // variety of reasons (e.g. it may be written in assembly).
3921  !CalleeF->isDeclaration()) {
3922  Instruction *OldCall = CS.getInstruction();
3923  new StoreInst(ConstantInt::getTrue(Callee->getContext()),
3925  OldCall);
3926  // If OldCall does not return void then replaceAllUsesWith undef.
3927  // This allows ValueHandlers and custom metadata to adjust itself.
3928  if (!OldCall->getType()->isVoidTy())
3929  replaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
3930  if (isa<CallInst>(OldCall))
3931  return eraseInstFromFunction(*OldCall);
3932 
3933  // We cannot remove an invoke, because it would change the CFG, just
3934  // change the callee to a null pointer.
3935  cast<InvokeInst>(OldCall)->setCalledFunction(
3936  Constant::getNullValue(CalleeF->getType()));
3937  return nullptr;
3938  }
3939  }
3940 
3941  if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
3942  // If CS does not return void then replaceAllUsesWith undef.
3943  // This allows ValueHandlers and custom metadata to adjust itself.
3944  if (!CS.getInstruction()->getType()->isVoidTy())
3945  replaceInstUsesWith(*CS.getInstruction(),
3947 
3948  if (isa<InvokeInst>(CS.getInstruction())) {
3949  // Can't remove an invoke because we cannot change the CFG.
3950  return nullptr;
3951  }
3952 
3953  // This instruction is not reachable, just remove it. We insert a store to
3954  // undef so that we know that this code is not reachable, despite the fact
3955  // that we can't modify the CFG here.
3956  new StoreInst(ConstantInt::getTrue(Callee->getContext()),
3958  CS.getInstruction());
3959 
3960  return eraseInstFromFunction(*CS.getInstruction());
3961  }
3962 
3963  if (IntrinsicInst *II = findInitTrampoline(Callee))
3964  return transformCallThroughTrampoline(CS, II);
3965 
3966  PointerType *PTy = cast<PointerType>(Callee->getType());
3967  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
3968  if (FTy->isVarArg()) {
3969  int ix = FTy->getNumParams();
3970  // See if we can optimize any arguments passed through the varargs area of
3971  // the call.
3972  for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
3973  E = CS.arg_end(); I != E; ++I, ++ix) {
3974  CastInst *CI = dyn_cast<CastInst>(*I);
3975  if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) {
3976  *I = CI->getOperand(0);
3977  Changed = true;
3978  }
3979  }
3980  }
3981 
3982  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
3983  // Inline asm calls cannot throw - mark them 'nounwind'.
3984  CS.setDoesNotThrow();
3985  Changed = true;
3986  }
3987 
3988  // Try to optimize the call if possible, we require DataLayout for most of
3989  // this. None of these calls are seen as possibly dead so go ahead and
3990  // delete the instruction now.
3991  if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
3992  Instruction *I = tryOptimizeCall(CI);
3993  // If we changed something return the result, etc. Otherwise let
3994  // the fallthrough check.
3995  if (I) return eraseInstFromFunction(*I);
3996  }
3997 
3998  return Changed ? CS.getInstruction() : nullptr;
3999 }
4000 
4001 /// If the callee is a constexpr cast of a function, attempt to move the cast to
4002 /// the arguments of the call/invoke.
4003 bool InstCombiner::transformConstExprCastCall(CallSite CS) {
4005  if (!Callee)
4006  return false;
4007 
4008  // The prototype of a thunk is a lie. Don't directly call such a function.
4009  if (Callee->hasFnAttribute("thunk"))
4010  return false;
4011 
4012  Instruction *Caller = CS.getInstruction();
4013  const AttributeList &CallerPAL = CS.getAttributes();
4014 
4015  // Okay, this is a cast from a function to a different type. Unless doing so
4016  // would cause a type conversion of one of our arguments, change this call to
4017  // be a direct call with arguments casted to the appropriate types.
4018  FunctionType *FT = Callee->getFunctionType();
4019  Type *OldRetTy = Caller->getType();
4020  Type *NewRetTy = FT->getReturnType();
4021 
4022  // Check to see if we are changing the return type...
4023  if (OldRetTy != NewRetTy) {
4024 
4025  if (NewRetTy->isStructTy())
4026  return false; // TODO: Handle multiple return values.
4027 
4028  if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
4029  if (Callee->isDeclaration())
4030  return false; // Cannot transform this return value.
4031 
4032  if (!Caller->use_empty() &&
4033  // void -> non-void is handled specially
4034  !NewRetTy->isVoidTy())
4035  return false; // Cannot transform this return value.
4036  }
4037 
4038  if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
4039  AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
4040  if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
4041  return false; // Attribute not compatible with transformed value.
4042  }
4043 
4044  // If the callsite is an invoke instruction, and the return value is used by
4045  // a PHI node in a successor, we cannot change the return type of the call
4046  // because there is no place to put the cast instruction (without breaking
4047  // the critical edge). Bail out in this case.
4048  if (!Caller->use_empty())
4049  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
4050  for (User *U : II->users())
4051  if (PHINode *PN = dyn_cast<PHINode>(U))
4052  if (PN->getParent() == II->getNormalDest() ||
4053  PN->getParent() == II->getUnwindDest())
4054  return false;
4055  }
4056 
4057  unsigned NumActualArgs = CS.arg_size();
4058  unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
4059 
4060  // Prevent us turning:
4061  // declare void @takes_i32_inalloca(i32* inalloca)
4062  // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
4063  //
4064  // into:
4065  // call void @takes_i32_inalloca(i32* null)
4066  //
4067  // Similarly, avoid folding away bitcasts of byval calls.
4068  if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
4069  Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
4070  return false;
4071 
4073  for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
4074  Type *ParamTy = FT->getParamType(i);
4075  Type *ActTy = (*AI)->getType();
4076 
4077  if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
4078  return false; // Cannot transform this parameter value.
4079 
4080  if (AttrBuilder(CallerPAL.getParamAttributes(i))
4081  .overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
4082  return false; // Attribute not compatible with transformed value.
4083 
4084  if (CS.isInAllocaArgument(i))
4085  return false; // Cannot transform to and from inalloca.
4086 
4087  // If the parameter is passed as a byval argument, then we have to have a
4088  // sized type and the sized type has to have the same size as the old type.
4089  if (ParamTy != ActTy && CallerPAL.hasParamAttribute(i, Attribute::ByVal)) {
4090  PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
4091  if (!ParamPTy || !ParamPTy->getElementType()->isSized())
4092  return false;
4093 
4094  Type *CurElTy = ActTy->getPointerElementType();
4095  if (DL.getTypeAllocSize(CurElTy) !=
4096  DL.getTypeAllocSize(ParamPTy->getElementType()))
4097  return false;
4098  }
4099  }
4100 
4101  if (Callee->isDeclaration()) {
4102  // Do not delete arguments unless we have a function body.
4103  if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
4104  return false;
4105 
4106  // If the callee is just a declaration, don't change the varargsness of the
4107  // call. We don't want to introduce a varargs call where one doesn't
4108  // already exist.
4109  PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
4110  if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
4111  return false;
4112 
4113  // If both the callee and the cast type are varargs, we still have to make
4114  // sure the number of fixed parameters are the same or we have the same
4115  // ABI issues as if we introduce a varargs call.
4116  if (FT->isVarArg() &&
4117  cast<FunctionType>(APTy->getElementType())->isVarArg() &&
4118  FT->getNumParams() !=
4119  cast<FunctionType>(APTy->getElementType())->getNumParams())
4120  return false;
4121  }
4122 
4123  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
4124  !CallerPAL.isEmpty()) {
4125  // In this case we have more arguments than the new function type, but we
4126  // won't be dropping them. Check that these extra arguments have attributes
4127  // that are compatible with being a vararg call argument.
4128  unsigned SRetIdx;
4129  if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
4130  SRetIdx > FT->getNumParams())
4131  return false;
4132  }
4133 
4134  // Okay, we decided that this is a safe thing to do: go ahead and start
4135  // inserting cast instructions as necessary.
4138  Args.reserve(NumActualArgs);
4139  ArgAttrs.reserve(NumActualArgs);
4140 
4141  // Get any return attributes.
4142  AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
4143 
4144  // If the return value is not being used, the type may not be compatible
4145  // with the existing attributes. Wipe out any problematic attributes.
4146  RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
4147 
4148  AI = CS.arg_begin();
4149  for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
4150  Type *ParamTy = FT->getParamType(i);
4151 
4152  Value *NewArg = *AI;
4153  if ((*AI)->getType() != ParamTy)
4154  NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
4155  Args.push_back(NewArg);
4156 
4157  // Add any parameter attributes.
4158  ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
4159  }
4160 
4161  // If the function takes more arguments than the call was taking, add them
4162  // now.
4163  for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
4165  ArgAttrs.push_back(AttributeSet());
4166  }
4167 
4168  // If we are removing arguments to the function, emit an obnoxious warning.
4169  if (FT->getNumParams() < NumActualArgs) {
4170  // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
4171  if (FT->isVarArg()) {
4172  // Add all of the arguments in their promoted form to the arg list.
4173  for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
4174  Type *PTy = getPromotedType((*AI)->getType());
4175  Value *NewArg = *AI;
4176  if (PTy != (*AI)->getType()) {
4177  // Must promote to pass through va_arg area!
4178  Instruction::CastOps opcode =
4179  CastInst::getCastOpcode(*AI, false, PTy, false);
4180  NewArg = Builder.CreateCast(opcode, *AI, PTy);
4181  }
4182  Args.push_back(NewArg);
4183 
4184  // Add any parameter attributes.
4185  ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
4186  }
4187  }
4188  }
4189 
4190  AttributeSet FnAttrs = CallerPAL.getFnAttributes();
4191 
4192  if (NewRetTy->isVoidTy())
4193  Caller->setName(""); // Void type should not have a name.
4194 
4195  assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
4196  "missing argument attributes");
4197  LLVMContext &Ctx = Callee->getContext();
4198  AttributeList NewCallerPAL = AttributeList::get(
4199  Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
4200 
4202  CS.getOperandBundlesAsDefs(OpBundles);
4203 
4204  CallSite NewCS;
4205  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4206  NewCS = Builder.CreateInvoke(Callee, II->getNormalDest(),
4207  II->getUnwindDest(), Args, OpBundles);
4208  } else {
4209  NewCS = Builder.CreateCall(Callee, Args, OpBundles);
4210  cast<CallInst>(NewCS.getInstruction())
4211  ->setTailCallKind(cast<CallInst>(Caller)->getTailCallKind());
4212  }
4213  NewCS->takeName(Caller);
4214  NewCS.setCallingConv(CS.getCallingConv());
4215  NewCS.setAttributes(NewCallerPAL);
4216 
4217  // Preserve the weight metadata for the new call instruction. The metadata
4218  // is used by SamplePGO to check callsite's hotness.
4219  uint64_t W;
4220  if (Caller->extractProfTotalWeight(W))
4221  NewCS->setProfWeight(W);
4222 
4223  // Insert a cast of the return type as necessary.
4224  Instruction *NC = NewCS.getInstruction();
4225  Value *NV = NC;
4226  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
4227  if (!NV->getType()->isVoidTy()) {
4228  NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
4229  NC->setDebugLoc(Caller->getDebugLoc());
4230 
4231  // If this is an invoke instruction, we should insert it after the first
4232  // non-phi, instruction in the normal successor block.
4233  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4234  BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
4235  InsertNewInstBefore(NC, *I);
4236  } else {
4237  // Otherwise, it's a call, just insert cast right after the call.
4238  InsertNewInstBefore(NC, *Caller);
4239  }
4240  Worklist.AddUsersToWorkList(*Caller);
4241  } else {
4242  NV = UndefValue::get(Caller->getType());
4243  }
4244  }
4245 
4246  if (!Caller->use_empty())
4247  replaceInstUsesWith(*Caller, NV);
4248  else if (Caller->hasValueHandle()) {
4249  if (OldRetTy == NV->getType())
4250  ValueHandleBase::ValueIsRAUWd(Caller, NV);
4251  else
4252  // We cannot call ValueIsRAUWd with a different type, and the
4253  // actual tracked value will disappear.
4255  }
4256 
4257  eraseInstFromFunction(*Caller);
4258  return true;
4259 }
4260 
4261 /// Turn a call to a function created by init_trampoline / adjust_trampoline
4262 /// intrinsic pair into a direct call to the underlying function.
4263 Instruction *
4264 InstCombiner::transformCallThroughTrampoline(CallSite CS,
4265  IntrinsicInst *Tramp) {
4266  Value *Callee = CS.getCalledValue();
4267  PointerType *PTy = cast<PointerType>(Callee->getType());
4268  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
4270 
4271  // If the call already has the 'nest' attribute somewhere then give up -
4272  // otherwise 'nest' would occur twice after splicing in the chain.
4273  if (Attrs.hasAttrSomewhere(Attribute::Nest))
4274  return nullptr;
4275 
4276  assert(Tramp &&
4277  "transformCallThroughTrampoline called with incorrect CallSite.");
4278 
4279  Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
4280  FunctionType *NestFTy = cast<FunctionType>(NestF->getValueType());
4281 
4282  AttributeList NestAttrs = NestF->getAttributes();
4283  if (!NestAttrs.isEmpty()) {
4284  unsigned NestArgNo = 0;
4285  Type *NestTy = nullptr;
4286  AttributeSet NestAttr;
4287 
4288  // Look for a parameter marked with the 'nest' attribute.
4289  for (FunctionType::param_iterator I = NestFTy->param_begin(),
4290  E = NestFTy->param_end();
4291  I != E; ++NestArgNo, ++I) {
4292  AttributeSet AS = NestAttrs.getParamAttributes(NestArgNo);
4293  if (AS.hasAttribute(Attribute::Nest)) {
4294  // Record the parameter type and any other attributes.
4295  NestTy = *I;
4296  NestAttr = AS;
4297  break;
4298  }
4299  }
4300 
4301  if (NestTy) {
4302  Instruction *Caller = CS.getInstruction();
4303  std::vector<Value*> NewArgs;
4304  std::vector<AttributeSet> NewArgAttrs;
4305  NewArgs.reserve(CS.arg_size() + 1);
4306  NewArgAttrs.reserve(CS.arg_size());
4307 
4308  // Insert the nest argument into the call argument list, which may
4309  // mean appending it. Likewise for attributes.
4310 
4311  {
4312  unsigned ArgNo = 0;
4313  CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
4314  do {
4315  if (ArgNo == NestArgNo) {
4316  // Add the chain argument and attributes.
4317  Value *NestVal = Tramp->getArgOperand(2);
4318  if (NestVal->getType() != NestTy)
4319  NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
4320  NewArgs.push_back(NestVal);
4321  NewArgAttrs.push_back(NestAttr);
4322  }
4323 
4324  if (I == E)
4325  break;
4326 
4327  // Add the original argument and attributes.
4328  NewArgs.push_back(*I);
4329  NewArgAttrs.push_back(Attrs.getParamAttributes(ArgNo));
4330 
4331  ++ArgNo;
4332  ++I;
4333  } while (true);
4334  }
4335 
4336  // The trampoline may have been bitcast to a bogus type (FTy).
4337  // Handle this by synthesizing a new function type, equal to FTy
4338  // with the chain parameter inserted.
4339 
4340  std::vector<Type*> NewTypes;
4341  NewTypes.reserve(FTy->getNumParams()+1);
4342 
4343  // Insert the chain's type into the list of parameter types, which may
4344  // mean appending it.
4345  {
4346  unsigned ArgNo = 0;
4347  FunctionType::param_iterator I = FTy->param_begin(),
4348  E = FTy->param_end();
4349 
4350  do {
4351  if (ArgNo == NestArgNo)
4352  // Add the chain's type.
4353  NewTypes.push_back(NestTy);
4354 
4355  if (I == E)
4356  break;
4357 
4358  // Add the original type.
4359  NewTypes.push_back(*I);
4360 
4361  ++ArgNo;
4362  ++I;
4363  } while (true);
4364  }
4365 
4366  // Replace the trampoline call with a direct call. Let the generic
4367  // code sort out any function type mismatches.
4368  FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
4369  FTy->isVarArg());
4370  Constant *NewCallee =
4371  NestF->getType() == PointerType::getUnqual(NewFTy) ?
4372  NestF : ConstantExpr::getBitCast(NestF,
4373  PointerType::getUnqual(NewFTy));
4374  AttributeList NewPAL =
4375  AttributeList::get(FTy->getContext(), Attrs.getFnAttributes(),
4376  Attrs.getRetAttributes(), NewArgAttrs);
4377 
4379  CS.getOperandBundlesAsDefs(OpBundles);
4380 
4381  Instruction *NewCaller;
4382  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4383  NewCaller = InvokeInst::Create(NewCallee,
4384  II->getNormalDest(), II->getUnwindDest(),
4385  NewArgs, OpBundles);
4386  cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
4387  cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
4388  } else {
4389  NewCaller = CallInst::Create(NewCallee, NewArgs, OpBundles);
4390  cast<CallInst>(NewCaller)->setTailCallKind(
4391  cast<CallInst>(Caller)->getTailCallKind());
4392  cast<CallInst>(NewCaller)->setCallingConv(
4393  cast<CallInst>(Caller)->getCallingConv());
4394  cast<CallInst>(NewCaller)->setAttributes(NewPAL);
4395  }
4396 
4397  return NewCaller;
4398  }
4399  }
4400 
4401  // Replace the trampoline call with a direct call. Since there is no 'nest'
4402  // parameter, there is no need to adjust the argument list. Let the generic
4403  // code sort out any function type mismatches.
4404  Constant *NewCallee =
4405  NestF->getType() == PTy ? NestF :
4406  ConstantExpr::getBitCast(NestF, PTy);
4407  CS.setCalledFunction(NewCallee);
4408  return CS.getInstruction();
4409 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool isFPPredicate() const
Definition: InstrTypes.h:944
const NoneType None
Definition: None.h:24
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double, and whose elements are just simple data values (i.e.
Definition: Constants.h:735
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:574
uint64_t CallInst * C
User::op_iterator arg_iterator
The type of iterator to use when looping over actual arguments at this call site. ...
Definition: CallSite.h:213
LibCallSimplifier - This class implements a collection of optimizations that replace well formed call...
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:172
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMin(const Opnd0 &Op0, const Opnd1 &Op1)
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction, which must be an operator which supports these flags.
void setDoesNotThrow()
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:72
static void ValueIsDeleted(Value *V)
Definition: Value.cpp:855
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1638
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
bool isZero() const
Definition: APFloat.h:1143
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:173
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:80
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1542
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1066
static Value * simplifyX86immShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:555
DiagnosticInfoOptimizationBase::Argument NV
unsigned arg_size() const
Definition: CallSite.h:219
CallingConv::ID getCallingConv() const
Get the calling convention of the call.
Definition: CallSite.h:312
Atomic ordering constants.
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:289
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
Constant * getElementAsConstant(unsigned i) const
Return a Constant for a specified index&#39;s element.
Definition: Constants.cpp:2645
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition: KnownBits.h:188
bool isInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed in an alloca.
Definition: CallSite.h:603
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMax(const Opnd0 &Op0, const Opnd1 &Op1)
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:262
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
An instruction for ordering other memory operations.
Definition: Instructions.h:440
match_zero m_Zero()
Match an arbitrary zero/null constant.
Definition: PatternMatch.h:145
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:446
Instruction * visitVACopyInst(VACopyInst &I)
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1237
static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC)
This class represents a function call, abstracting a target machine&#39;s calling convention.
This file contains the declarations for metadata subclasses.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:641
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this load instruction.
Definition: Instructions.h:239
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:91
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:617
iterator_range< IterTy > args() const
Definition: CallSite.h:215
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
bool hasValueHandle() const
Return true if there is a value handle associated with this value.
Definition: Value.h:487
unsigned less or equal
Definition: InstrTypes.h:879
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
unsigned less than
Definition: InstrTypes.h:878
This class represents the atomic memcpy intrinsic i.e.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:728
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
This class wraps the llvm.memset intrinsic.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:767
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:818
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1390
bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr)
Return true if it is valid to use the assumptions provided by an assume intrinsic, I, at the point in the control-flow identified by the context instruction, CxtI.
STATISTIC(NumFunctions, "Total number of functions")
Metadata node.
Definition: Metadata.h:862
F(f)
static CallInst * Create(Value *Func, ArrayRef< Value *> Args, ArrayRef< OperandBundleDef > Bundles=None, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
const fltSemantics & getSemantics() const
Definition: APFloat.h:1155
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:520
An instruction for reading from memory.
Definition: Instructions.h:164
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:883
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:1832
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:168
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
static OverflowCheckFlavor IntrinsicIDToOverflowCheckFlavor(unsigned ID)
Returns the OverflowCheckFlavor corresponding to a overflow_with_op intrinsic.
fneg_match< LHS > m_FNeg(const LHS &L)
Match a floating point negate.
void reserve(size_type N)
Definition: SmallVector.h:380
Value * getLength() const
static Instruction * simplifyMaskedStore(IntrinsicInst &II, InstCombiner &IC)
Instruction * visitVAStartInst(VAStartInst &I)
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:528
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
bool isGCRelocate(ImmutableCallSite CS)
Definition: Statepoint.cpp:43
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
const CallInst * isFreeCall(const Value *I, const TargetLibraryInfo *TLI)
isFreeCall - Returns non-null if the value is a call to the builtin free()
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:207
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:138
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op...
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
bool isIdenticalTo(const Instruction *I) const
Return true if the specified instruction is exactly identical to the current one. ...
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:968
static Instruction * SimplifyNVVMIntrinsic(IntrinsicInst *II, InstCombiner &IC)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
Instruction * visitInvokeInst(InvokeInst &II)
static Constant * getIntegerCast(Constant *C, Type *Ty, bool isSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:1518
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:515
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
Type * getPointerElementType() const
Definition: Type.h:373
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:951
OverflowCheckFlavor
Specific patterns of overflow check idioms that we match.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
unsigned getNumArgOperands() const
Return the number of call arguments.
Value * getRawSource() const
Return the arguments to the instruction.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:560
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:981
This class wraps the llvm.memmove intrinsic.
AttributeList addParamAttribute(LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const
Add an argument attribute to the list.
Definition: Attributes.h:398
Value * SimplifyCall(ImmutableCallSite CS, Value *V, User::op_iterator ArgBegin, User::op_iterator ArgEnd, const SimplifyQuery &Q)
Given a function and iterators over arguments, fold the result or return null.
IterTy arg_end() const
Definition: CallSite.h:575
Instruction * eraseInstFromFunction(Instruction &I)
Combiner aware instruction erasure.
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
Definition: PatternMatch.h:912
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:668
The core instruction combiner logic.
static bool isSafeToEliminateVarargsCast(const CallSite CS, const DataLayout &DL, const CastInst *const CI, const int ix)
If this cast does not affect the value passed through the varargs area, we can eliminate the use of t...
This file contains the simple types necessary to represent the attributes associated with functions a...
InstrTy * getInstruction() const
Definition: CallSite.h:92
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1556
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:286
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:966
This file implements a class to represent arbitrary precision integral constant values and operations...
not_match< LHS > m_Not(const LHS &L)
Definition: PatternMatch.h:985
All zero aggregate value.
Definition: Constants.h:332
static Value * simplifyX86vpermv(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
Metadata * LowAndHigh[]
ValTy * getCalledValue() const
Return the pointer to function that is being called.
Definition: CallSite.h:100
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
DominatorTree & getDominatorTree() const
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition: KnownBits.h:193
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:86
Class to represent function types.
Definition: DerivedTypes.h:103
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1448
bool isInfinity() const
Definition: APFloat.h:1144
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:1409
This represents the llvm.va_start intrinsic.
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
Matches FPExt.
Definition: PatternMatch.h:955
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4441
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:924
void setLength(Value *L)
AttributeSet getParamAttributes(unsigned ArgNo) const
The attributes for the argument or parameter at the given index are returned.
bool isVarArg() const
Definition: DerivedTypes.h:123
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:377
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:194
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.h:1841
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
AttrBuilder & remove(const AttrBuilder &B)
Remove the attributes from the builder.
static Value * simplifyX86pack(IntrinsicInst &II, bool IsSigned)
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:205
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition: APFloat.h:166
An instruction for storing to memory.
Definition: Instructions.h:306
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
Definition: Metadata.cpp:1328
SelectClass_match< Cond, LHS, RHS > m_Select(const Cond &C, const LHS &L, const RHS &R)
Definition: PatternMatch.h:869
static void ValueIsRAUWd(Value *Old, Value *New)
Definition: Value.cpp:908
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1387
static Value * simplifyX86vpcom(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned)
Decode XOP integer vector comparison intrinsics.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:292
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:408
static Value * simplifyX86movmsk(const IntrinsicInst &II)
amdgpu Simplify well known AMD library false Value * Callee
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:980
This class represents a truncation of integer types.
Type * getElementType() const
Return the element type of the array/vector.
Definition: Constants.cpp:2271
Value * getOperand(unsigned i) const
Definition: User.h:154
Class to represent pointers.
Definition: DerivedTypes.h:467
bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
Definition: Attributes.cpp:573
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:277
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:301
const DataLayout & getDataLayout() const
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:106
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1678
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:141
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:602
bool hasAttrSomewhere(Attribute::AttrKind Kind, unsigned *Index=nullptr) const
Return true if the specified attribute is set for at least one parameter or for the return value...
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:63
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1164
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:357
void setAttributes(AttributeList PAL)
Set the parameter attributes of the call.
Definition: CallSite.h:333
Instruction * visitFenceInst(FenceInst &FI)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
static Instruction * simplifyMaskedScatter(IntrinsicInst &II, InstCombiner &IC)
const Value * getCalledValue() const
Get a pointer to the function that is invoked by this instruction.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:54
static AttributeSet get(LLVMContext &C, const AttrBuilder &B)
Definition: Attributes.cpp:503
bool isNegative() const
Definition: APFloat.h:1147
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:281
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1306
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1049
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:421
ConstantInt * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to .objectsize into an integer value of the given Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
Definition: PatternMatch.h:580
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:288
bool isNaN() const
Definition: APFloat.h:1145
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:1693
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
static cl::opt< unsigned > UnfoldElementAtomicMemcpyMaxElements("unfold-element-atomic-memcpy-max-elements", cl::init(16), cl::desc("Maximum number of elements in atomic memcpy the optimizer is " "allowed to unfold"))
unsigned getNumParams() const
Return the number of fixed parameters this function type requires.
Definition: DerivedTypes.h:139
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:382
unsigned getParamAlignment(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
This file declares a class to represent arbitrary precision floating point values and provide a varie...
bool isFast() const
Determine whether all fast-math-flags are set.
std::underlying_type< E >::type Underlying(E Val)
Check that Val is in range for E, and return Val cast to E&#39;s underlying type.
Definition: BitmaskEnum.h:91
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:853
static const unsigned End
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
Definition: PatternMatch.h:931
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:78
void setCallingConv(CallingConv::ID CC)
Set the calling convention of the call.
Definition: CallSite.h:316
bool isGCResult(ImmutableCallSite CS)
Definition: Statepoint.cpp:53
static FunctionType * get(Type *Result, ArrayRef< Type *> Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:297
self_iterator getIterator()
Definition: ilist_node.h:82
Class to represent integer types.
Definition: DerivedTypes.h:40
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
Definition: APInt.h:443
void setNotConvergent()
Definition: CallSite.h:527
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:61
void setAlignment(unsigned Align)
static Value * simplifyX86varShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1320
const AMDGPUAS & AS
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:558
uint32_t getElementSizeInBytes() const
bool isVolatile() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1214
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1238
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:937
static InvokeInst * Create(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value *> Args, const Twine &NameStr, Instruction *InsertBefore=nullptr)
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:959
static Value * simplifyX86muldq(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
signed greater than
Definition: InstrTypes.h:880
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:244
static Value * simplifyX86extrq(IntrinsicInst &II, Value *Op0, ConstantInt *CILength, ConstantInt *CIIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding or conversion to a shuffle...
bool doesNotThrow() const
Determine if the call cannot unwind.
const APFloat & getValueAPF() const
Definition: Constants.h:294
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:918
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:452
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:163
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:240
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
This is the common base class for memset/memcpy/memmove.
Iterator for intrusive lists based on ilist_node.
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:178
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
static PointerType * getInt1PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:216
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:251
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the generic address space (address sp...
Definition: DerivedTypes.h:482
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
static Value * simplifyX86vpermilvar(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermilvar* to shufflevector if the mask is constant.
iterator end()
Definition: BasicBlock.h:254
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
IterTy arg_begin() const
Definition: CallSite.h:571
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
Type::subtype_iterator param_iterator
Definition: DerivedTypes.h:126
bool overlaps(const AttrBuilder &B) const
Return true if the builder has any attribute that&#39;s in the specified builder.
static Instruction * simplifyMaskedGather(IntrinsicInst &II, InstCombiner &IC)
void setDoesNotThrow()
Definition: CallSite.h:508
signed less than
Definition: InstrTypes.h:882
Type * getReturnType() const
Definition: DerivedTypes.h:124
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, unsigned Align, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:383
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition: APFloat.h:1205
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:1740
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:560
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.cpp:574
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:623
#define NC
Definition: regutils.h:42
CallInst * CreateMaskedLoad(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:362
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1272
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:516
bool isDenormal() const
Definition: APFloat.h:1148
void setOperand(unsigned i, Value *Val)
Definition: User.h:159
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
signed less or equal
Definition: InstrTypes.h:883
Class to represent vector types.
Definition: DerivedTypes.h:393
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:57
Class for arbitrary precision integers.
Definition: APInt.h:69
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:401
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1012
static Value * simplifyX86pshufb(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert pshufb* to shufflevector if the mask is constant.
static cl::opt< bool > FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), cl::init(false))
amdgpu Simplify well known AMD library false Value Value * Arg
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:333
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::ZeroOrMore, cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate IT block based on arch"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow deprecated IT based on ARMv8"), clEnumValN(NoRestrictedIT, "arm-no-restrict-it", "Allow IT blocks based on ARMv7")))
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:403
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
Definition: PatternMatch.h:407
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass&#39;s ...
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Definition: Instructions.h:364
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:538
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:185
static Value * simplifyMinnumMaxnum(const IntrinsicInst &II)
void setCalledFunction(Value *Fn)
Set the function called.
This class wraps the llvm.memcpy/memmove intrinsics.
static Value * simplifyMaskedLoad(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Instruction * simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC)
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:284
static bool maskIsAllOneOrUndef(Value *Mask)
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
OverflowResult
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
unsigned greater or equal
Definition: InstrTypes.h:877
match_one m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:194
void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Definition: CallSite.h:582
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:220
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool doesNotThrow() const
Determine if the call cannot unwind.
Definition: CallSite.h:505
void setArgOperand(unsigned i, Value *v)
bool isNormal() const
Definition: APFloat.h:1151
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc...
Value * optimizeCall(CallInst *CI)
optimizeCall - Take the given call instruction and return a more optimal value to replace the instruc...
static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID, unsigned EndID, InstCombiner &IC)
unsigned getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition: Local.h:234
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Type * getValueType() const
Definition: GlobalValue.h:267
static IntrinsicInst * findInitTrampoline(Value *Callee)
bool isByValOrInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed by value or in an alloca.
Definition: CallSite.h:608
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:81
AssumptionCache & getAssumptionCache() const
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:449
static PointerType * getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS=0)
Definition: Type.cpp:212
static Value * simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, APInt APLength, APInt APIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant folding or conversion to a shu...
bool isStatepoint(ImmutableCallSite CS)
Definition: Statepoint.cpp:27
static Constant * getNegativeIsTrueBoolVec(ConstantDataVector *V)
Return a constant boolean vector that has true elements in all positions where the input constant dat...
iterator_range< op_iterator > arg_operands()
Iteration adapter for range-for loops.
static Value * emitX86MaskSelect(Value *Mask, Value *Op0, Value *Op1, InstCombiner::BuilderTy &Builder)
This represents the llvm.va_copy intrinsic.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:538
match_all_ones m_AllOnes()
Match an integer or vector with all bits set to true.
Definition: PatternMatch.h:205
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
LoadInst * CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name)
Definition: IRBuilder.h:1186
static Instruction * foldCtpop(IntrinsicInst &II, InstCombiner &IC)
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
LLVM Value Representation.
Definition: Value.h:73
void setAlignment(unsigned Align)
This file provides internal interfaces used to implement the InstCombine.
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:593
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
AttrBuilder typeIncompatible(Type *Ty)
Which attributes cannot be applied to a type.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
AttributeSet getFnAttributes() const
The function attributes are returned.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:270
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1031
Invoke instruction.
#define DEBUG(X)
Definition: Debug.h:118
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:148
bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Return true if the given value is known to be non-zero when defined.
IRTranslator LLVM IR MI
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:414
unsigned greater than
Definition: InstrTypes.h:876
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:967
void addAttribute(unsigned i, Attribute::AttrKind Kind)
adds the attribute to the list of attributes.
AttributeList getAttributes() const
Get the parameter attributes of the call.
Definition: CallSite.h:329
unsigned getNumElements() const
Return the number of elements in the array or vector.
Definition: Constants.cpp:2294
bool isConvergent() const
Determine if the call is convergent.
Definition: CallSite.h:521
static APInt getNullValue(unsigned numBits)
Get the &#39;0&#39; value.
Definition: APInt.h:562
match_nan m_NaN()
Match an arbitrary NaN constant. This includes quiet and signalling nans.
Definition: PatternMatch.h:183
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
static Constant * getMul(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2137
static Value * simplifyX86insertps(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
This class represents an extension of floating point types.
bool isEmpty() const
Return true if there are no attributes.
Definition: Attributes.h:646
Root of the metadata hierarchy.
Definition: Metadata.h:58
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:174
void setCalledFunction(Value *V)
Set the callee to the specified value.
Definition: CallSite.h:126
bool isSignaling() const
Definition: APFloat.h:1149
Value * getRawDest() const
static Type * getPromotedType(Type *Ty)
Return the specified type promoted as it would be to pass though a va_arg area.
bool use_empty() const
Definition: Value.h:328
static Constant * get(ArrayRef< Constant *> V)
Definition: Constants.cpp:984
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Type * getElementType() const
Definition: DerivedTypes.h:486
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1227
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:265
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:359
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute >> Attrs)
Create an AttributeList with the specified parameters in it.
Definition: Attributes.cpp:868
bool isLosslessCast() const
A lossless cast is one that does not alter the basic value.
bool isNullValue() const
Determine if all bits are clear.
Definition: APInt.h:399
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:215
signed greater or equal
Definition: InstrTypes.h:881
User * user_back()
Definition: Value.h:387
cmpResult compare(const APFloat &RHS) const
Definition: APFloat.h:1102
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:66
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:837
CallInst * CreateCall(Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1663