LLVM  6.0.0svn
InstCombineCalls.cpp
Go to the documentation of this file.
1 //===- InstCombineCalls.cpp -----------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the visitCall and visitInvoke functions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/ADT/Twine.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/CallSite.h"
28 #include "llvm/IR/Constant.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/DerivedTypes.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/GlobalVariable.h"
33 #include "llvm/IR/InstrTypes.h"
34 #include "llvm/IR/Instruction.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/Intrinsics.h"
38 #include "llvm/IR/LLVMContext.h"
39 #include "llvm/IR/Metadata.h"
40 #include "llvm/IR/PatternMatch.h"
41 #include "llvm/IR/Statepoint.h"
42 #include "llvm/IR/Type.h"
43 #include "llvm/IR/Value.h"
44 #include "llvm/IR/ValueHandle.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Debug.h"
47 #include "llvm/Support/KnownBits.h"
51 #include <algorithm>
52 #include <cassert>
53 #include <cstdint>
54 #include <cstring>
55 #include <vector>
56 
57 using namespace llvm;
58 using namespace PatternMatch;
59 
60 #define DEBUG_TYPE "instcombine"
61 
62 STATISTIC(NumSimplified, "Number of library calls simplified");
63 
65  "unfold-element-atomic-memcpy-max-elements",
66  cl::init(16),
67  cl::desc("Maximum number of elements in atomic memcpy the optimizer is "
68  "allowed to unfold"));
69 
70 /// Return the specified type promoted as it would be to pass though a va_arg
71 /// area.
72 static Type *getPromotedType(Type *Ty) {
73  if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
74  if (ITy->getBitWidth() < 32)
75  return Type::getInt32Ty(Ty->getContext());
76  }
77  return Ty;
78 }
79 
80 /// Return a constant boolean vector that has true elements in all positions
81 /// where the input constant data vector has an element with the sign bit set.
84  IntegerType *BoolTy = Type::getInt1Ty(V->getContext());
85  for (unsigned I = 0, E = V->getNumElements(); I != E; ++I) {
86  Constant *Elt = V->getElementAsConstant(I);
87  assert((isa<ConstantInt>(Elt) || isa<ConstantFP>(Elt)) &&
88  "Unexpected constant data vector element type");
89  bool Sign = V->getElementType()->isIntegerTy()
90  ? cast<ConstantInt>(Elt)->isNegative()
91  : cast<ConstantFP>(Elt)->isNegative();
92  BoolVec.push_back(ConstantInt::get(BoolTy, Sign));
93  }
94  return ConstantVector::get(BoolVec);
95 }
96 
97 Instruction *InstCombiner::SimplifyElementUnorderedAtomicMemCpy(
99  // Try to unfold this intrinsic into sequence of explicit atomic loads and
100  // stores.
101  // First check that number of elements is compile time constant.
102  auto *LengthCI = dyn_cast<ConstantInt>(AMI->getLength());
103  if (!LengthCI)
104  return nullptr;
105 
106  // Check that there are not too many elements.
107  uint64_t LengthInBytes = LengthCI->getZExtValue();
108  uint32_t ElementSizeInBytes = AMI->getElementSizeInBytes();
109  uint64_t NumElements = LengthInBytes / ElementSizeInBytes;
110  if (NumElements >= UnfoldElementAtomicMemcpyMaxElements)
111  return nullptr;
112 
113  // Only expand if there are elements to copy.
114  if (NumElements > 0) {
115  // Don't unfold into illegal integers
116  uint64_t ElementSizeInBits = ElementSizeInBytes * 8;
117  if (!getDataLayout().isLegalInteger(ElementSizeInBits))
118  return nullptr;
119 
120  // Cast source and destination to the correct type. Intrinsic input
121  // arguments are usually represented as i8*. Often operands will be
122  // explicitly casted to i8* and we can just strip those casts instead of
123  // inserting new ones. However it's easier to rely on other InstCombine
124  // rules which will cover trivial cases anyway.
125  Value *Src = AMI->getRawSource();
126  Value *Dst = AMI->getRawDest();
127  Type *ElementPointerType =
128  Type::getIntNPtrTy(AMI->getContext(), ElementSizeInBits,
129  Src->getType()->getPointerAddressSpace());
130 
131  Value *SrcCasted = Builder.CreatePointerCast(Src, ElementPointerType,
132  "memcpy_unfold.src_casted");
133  Value *DstCasted = Builder.CreatePointerCast(Dst, ElementPointerType,
134  "memcpy_unfold.dst_casted");
135 
136  for (uint64_t i = 0; i < NumElements; ++i) {
137  // Get current element addresses
138  ConstantInt *ElementIdxCI =
139  ConstantInt::get(AMI->getContext(), APInt(64, i));
140  Value *SrcElementAddr =
141  Builder.CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr");
142  Value *DstElementAddr =
143  Builder.CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr");
144 
145  // Load from the source. Transfer alignment information and mark load as
146  // unordered atomic.
147  LoadInst *Load = Builder.CreateLoad(SrcElementAddr, "memcpy_unfold.val");
149  // We know alignment of the first element. It is also guaranteed by the
150  // verifier that element size is less or equal than first element
151  // alignment and both of this values are powers of two. This means that
152  // all subsequent accesses are at least element size aligned.
153  // TODO: We can infer better alignment but there is no evidence that this
154  // will matter.
155  Load->setAlignment(i == 0 ? AMI->getParamAlignment(1)
156  : ElementSizeInBytes);
157  Load->setDebugLoc(AMI->getDebugLoc());
158 
159  // Store loaded value via unordered atomic store.
160  StoreInst *Store = Builder.CreateStore(Load, DstElementAddr);
162  Store->setAlignment(i == 0 ? AMI->getParamAlignment(0)
163  : ElementSizeInBytes);
164  Store->setDebugLoc(AMI->getDebugLoc());
165  }
166  }
167 
168  // Set the number of elements of the copy to 0, it will be deleted on the
169  // next iteration.
170  AMI->setLength(Constant::getNullValue(LengthCI->getType()));
171  return AMI;
172 }
173 
174 Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
175  unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, &AC, &DT);
176  unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, &AC, &DT);
177  unsigned MinAlign = std::min(DstAlign, SrcAlign);
178  unsigned CopyAlign = MI->getAlignment();
179 
180  if (CopyAlign < MinAlign) {
181  MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), MinAlign, false));
182  return MI;
183  }
184 
185  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
186  // load/store.
187  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
188  if (!MemOpLength) return nullptr;
189 
190  // Source and destination pointer types are always "i8*" for intrinsic. See
191  // if the size is something we can handle with a single primitive load/store.
192  // A single load+store correctly handles overlapping memory in the memmove
193  // case.
194  uint64_t Size = MemOpLength->getLimitedValue();
195  assert(Size && "0-sized memory transferring should be removed already.");
196 
197  if (Size > 8 || (Size&(Size-1)))
198  return nullptr; // If not 1/2/4/8 bytes, exit.
199 
200  // Use an integer load+store unless we can find something better.
201  unsigned SrcAddrSp =
202  cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
203  unsigned DstAddrSp =
204  cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
205 
206  IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
207  Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
208  Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
209 
210  // If the memcpy has metadata describing the members, see if we can get the
211  // TBAA tag describing our copy.
212  MDNode *CopyMD = nullptr;
213  if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
214  if (M->getNumOperands() == 3 && M->getOperand(0) &&
215  mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
216  mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() &&
217  M->getOperand(1) &&
218  mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
219  mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
220  Size &&
221  M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
222  CopyMD = cast<MDNode>(M->getOperand(2));
223  }
224 
225  // If the memcpy/memmove provides better alignment info than we can
226  // infer, use it.
227  SrcAlign = std::max(SrcAlign, CopyAlign);
228  DstAlign = std::max(DstAlign, CopyAlign);
229 
230  Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
231  Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
232  LoadInst *L = Builder.CreateLoad(Src, MI->isVolatile());
233  L->setAlignment(SrcAlign);
234  if (CopyMD)
235  L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
236  MDNode *LoopMemParallelMD =
238  if (LoopMemParallelMD)
240 
241  StoreInst *S = Builder.CreateStore(L, Dest, MI->isVolatile());
242  S->setAlignment(DstAlign);
243  if (CopyMD)
244  S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
245  if (LoopMemParallelMD)
247 
248  // Set the size of the copy to 0, it will be deleted on the next iteration.
249  MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType()));
250  return MI;
251 }
252 
253 Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
254  unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
255  if (MI->getAlignment() < Alignment) {
256  MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
257  Alignment, false));
258  return MI;
259  }
260 
261  // Extract the length and alignment and fill if they are constant.
262  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
263  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
264  if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
265  return nullptr;
266  uint64_t Len = LenC->getLimitedValue();
267  Alignment = MI->getAlignment();
268  assert(Len && "0-sized memory setting should be removed already.");
269 
270  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
271  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
272  Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
273 
274  Value *Dest = MI->getDest();
275  unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
276  Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
277  Dest = Builder.CreateBitCast(Dest, NewDstPtrTy);
278 
279  // Alignment 0 is identity for alignment 1 for memset, but not store.
280  if (Alignment == 0) Alignment = 1;
281 
282  // Extract the fill value and store.
283  uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
284  StoreInst *S = Builder.CreateStore(ConstantInt::get(ITy, Fill), Dest,
285  MI->isVolatile());
286  S->setAlignment(Alignment);
287 
288  // Set the size of the copy to 0, it will be deleted on the next iteration.
289  MI->setLength(Constant::getNullValue(LenC->getType()));
290  return MI;
291  }
292 
293  return nullptr;
294 }
295 
297  InstCombiner::BuilderTy &Builder) {
298  bool LogicalShift = false;
299  bool ShiftLeft = false;
300 
301  switch (II.getIntrinsicID()) {
302  default: llvm_unreachable("Unexpected intrinsic!");
303  case Intrinsic::x86_sse2_psra_d:
304  case Intrinsic::x86_sse2_psra_w:
305  case Intrinsic::x86_sse2_psrai_d:
306  case Intrinsic::x86_sse2_psrai_w:
307  case Intrinsic::x86_avx2_psra_d:
308  case Intrinsic::x86_avx2_psra_w:
309  case Intrinsic::x86_avx2_psrai_d:
310  case Intrinsic::x86_avx2_psrai_w:
311  case Intrinsic::x86_avx512_psra_q_128:
312  case Intrinsic::x86_avx512_psrai_q_128:
313  case Intrinsic::x86_avx512_psra_q_256:
314  case Intrinsic::x86_avx512_psrai_q_256:
315  case Intrinsic::x86_avx512_psra_d_512:
316  case Intrinsic::x86_avx512_psra_q_512:
317  case Intrinsic::x86_avx512_psra_w_512:
318  case Intrinsic::x86_avx512_psrai_d_512:
319  case Intrinsic::x86_avx512_psrai_q_512:
320  case Intrinsic::x86_avx512_psrai_w_512:
321  LogicalShift = false; ShiftLeft = false;
322  break;
323  case Intrinsic::x86_sse2_psrl_d:
324  case Intrinsic::x86_sse2_psrl_q:
325  case Intrinsic::x86_sse2_psrl_w:
326  case Intrinsic::x86_sse2_psrli_d:
327  case Intrinsic::x86_sse2_psrli_q:
328  case Intrinsic::x86_sse2_psrli_w:
329  case Intrinsic::x86_avx2_psrl_d:
330  case Intrinsic::x86_avx2_psrl_q:
331  case Intrinsic::x86_avx2_psrl_w:
332  case Intrinsic::x86_avx2_psrli_d:
333  case Intrinsic::x86_avx2_psrli_q:
334  case Intrinsic::x86_avx2_psrli_w:
335  case Intrinsic::x86_avx512_psrl_d_512:
336  case Intrinsic::x86_avx512_psrl_q_512:
337  case Intrinsic::x86_avx512_psrl_w_512:
338  case Intrinsic::x86_avx512_psrli_d_512:
339  case Intrinsic::x86_avx512_psrli_q_512:
340  case Intrinsic::x86_avx512_psrli_w_512:
341  LogicalShift = true; ShiftLeft = false;
342  break;
343  case Intrinsic::x86_sse2_psll_d:
344  case Intrinsic::x86_sse2_psll_q:
345  case Intrinsic::x86_sse2_psll_w:
346  case Intrinsic::x86_sse2_pslli_d:
347  case Intrinsic::x86_sse2_pslli_q:
348  case Intrinsic::x86_sse2_pslli_w:
349  case Intrinsic::x86_avx2_psll_d:
350  case Intrinsic::x86_avx2_psll_q:
351  case Intrinsic::x86_avx2_psll_w:
352  case Intrinsic::x86_avx2_pslli_d:
353  case Intrinsic::x86_avx2_pslli_q:
354  case Intrinsic::x86_avx2_pslli_w:
355  case Intrinsic::x86_avx512_psll_d_512:
356  case Intrinsic::x86_avx512_psll_q_512:
357  case Intrinsic::x86_avx512_psll_w_512:
358  case Intrinsic::x86_avx512_pslli_d_512:
359  case Intrinsic::x86_avx512_pslli_q_512:
360  case Intrinsic::x86_avx512_pslli_w_512:
361  LogicalShift = true; ShiftLeft = true;
362  break;
363  }
364  assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
365 
366  // Simplify if count is constant.
367  auto Arg1 = II.getArgOperand(1);
368  auto CAZ = dyn_cast<ConstantAggregateZero>(Arg1);
369  auto CDV = dyn_cast<ConstantDataVector>(Arg1);
370  auto CInt = dyn_cast<ConstantInt>(Arg1);
371  if (!CAZ && !CDV && !CInt)
372  return nullptr;
373 
374  APInt Count(64, 0);
375  if (CDV) {
376  // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector
377  // operand to compute the shift amount.
378  auto VT = cast<VectorType>(CDV->getType());
379  unsigned BitWidth = VT->getElementType()->getPrimitiveSizeInBits();
380  assert((64 % BitWidth) == 0 && "Unexpected packed shift size");
381  unsigned NumSubElts = 64 / BitWidth;
382 
383  // Concatenate the sub-elements to create the 64-bit value.
384  for (unsigned i = 0; i != NumSubElts; ++i) {
385  unsigned SubEltIdx = (NumSubElts - 1) - i;
386  auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
387  Count <<= BitWidth;
388  Count |= SubElt->getValue().zextOrTrunc(64);
389  }
390  }
391  else if (CInt)
392  Count = CInt->getValue();
393 
394  auto Vec = II.getArgOperand(0);
395  auto VT = cast<VectorType>(Vec->getType());
396  auto SVT = VT->getElementType();
397  unsigned VWidth = VT->getNumElements();
398  unsigned BitWidth = SVT->getPrimitiveSizeInBits();
399 
400  // If shift-by-zero then just return the original value.
401  if (Count.isNullValue())
402  return Vec;
403 
404  // Handle cases when Shift >= BitWidth.
405  if (Count.uge(BitWidth)) {
406  // If LogicalShift - just return zero.
407  if (LogicalShift)
408  return ConstantAggregateZero::get(VT);
409 
410  // If ArithmeticShift - clamp Shift to (BitWidth - 1).
411  Count = APInt(64, BitWidth - 1);
412  }
413 
414  // Get a constant vector of the same type as the first operand.
415  auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
416  auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
417 
418  if (ShiftLeft)
419  return Builder.CreateShl(Vec, ShiftVec);
420 
421  if (LogicalShift)
422  return Builder.CreateLShr(Vec, ShiftVec);
423 
424  return Builder.CreateAShr(Vec, ShiftVec);
425 }
426 
427 // Attempt to simplify AVX2 per-element shift intrinsics to a generic IR shift.
428 // Unlike the generic IR shifts, the intrinsics have defined behaviour for out
429 // of range shift amounts (logical - set to zero, arithmetic - splat sign bit).
431  InstCombiner::BuilderTy &Builder) {
432  bool LogicalShift = false;
433  bool ShiftLeft = false;
434 
435  switch (II.getIntrinsicID()) {
436  default: llvm_unreachable("Unexpected intrinsic!");
437  case Intrinsic::x86_avx2_psrav_d:
438  case Intrinsic::x86_avx2_psrav_d_256:
439  case Intrinsic::x86_avx512_psrav_q_128:
440  case Intrinsic::x86_avx512_psrav_q_256:
441  case Intrinsic::x86_avx512_psrav_d_512:
442  case Intrinsic::x86_avx512_psrav_q_512:
443  case Intrinsic::x86_avx512_psrav_w_128:
444  case Intrinsic::x86_avx512_psrav_w_256:
445  case Intrinsic::x86_avx512_psrav_w_512:
446  LogicalShift = false;
447  ShiftLeft = false;
448  break;
449  case Intrinsic::x86_avx2_psrlv_d:
450  case Intrinsic::x86_avx2_psrlv_d_256:
451  case Intrinsic::x86_avx2_psrlv_q:
452  case Intrinsic::x86_avx2_psrlv_q_256:
453  case Intrinsic::x86_avx512_psrlv_d_512:
454  case Intrinsic::x86_avx512_psrlv_q_512:
455  case Intrinsic::x86_avx512_psrlv_w_128:
456  case Intrinsic::x86_avx512_psrlv_w_256:
457  case Intrinsic::x86_avx512_psrlv_w_512:
458  LogicalShift = true;
459  ShiftLeft = false;
460  break;
461  case Intrinsic::x86_avx2_psllv_d:
462  case Intrinsic::x86_avx2_psllv_d_256:
463  case Intrinsic::x86_avx2_psllv_q:
464  case Intrinsic::x86_avx2_psllv_q_256:
465  case Intrinsic::x86_avx512_psllv_d_512:
466  case Intrinsic::x86_avx512_psllv_q_512:
467  case Intrinsic::x86_avx512_psllv_w_128:
468  case Intrinsic::x86_avx512_psllv_w_256:
469  case Intrinsic::x86_avx512_psllv_w_512:
470  LogicalShift = true;
471  ShiftLeft = true;
472  break;
473  }
474  assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
475 
476  // Simplify if all shift amounts are constant/undef.
477  auto *CShift = dyn_cast<Constant>(II.getArgOperand(1));
478  if (!CShift)
479  return nullptr;
480 
481  auto Vec = II.getArgOperand(0);
482  auto VT = cast<VectorType>(II.getType());
483  auto SVT = VT->getVectorElementType();
484  int NumElts = VT->getNumElements();
485  int BitWidth = SVT->getIntegerBitWidth();
486 
487  // Collect each element's shift amount.
488  // We also collect special cases: UNDEF = -1, OUT-OF-RANGE = BitWidth.
489  bool AnyOutOfRange = false;
490  SmallVector<int, 8> ShiftAmts;
491  for (int I = 0; I < NumElts; ++I) {
492  auto *CElt = CShift->getAggregateElement(I);
493  if (CElt && isa<UndefValue>(CElt)) {
494  ShiftAmts.push_back(-1);
495  continue;
496  }
497 
498  auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
499  if (!COp)
500  return nullptr;
501 
502  // Handle out of range shifts.
503  // If LogicalShift - set to BitWidth (special case).
504  // If ArithmeticShift - set to (BitWidth - 1) (sign splat).
505  APInt ShiftVal = COp->getValue();
506  if (ShiftVal.uge(BitWidth)) {
507  AnyOutOfRange = LogicalShift;
508  ShiftAmts.push_back(LogicalShift ? BitWidth : BitWidth - 1);
509  continue;
510  }
511 
512  ShiftAmts.push_back((int)ShiftVal.getZExtValue());
513  }
514 
515  // If all elements out of range or UNDEF, return vector of zeros/undefs.
516  // ArithmeticShift should only hit this if they are all UNDEF.
517  auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); };
518  if (all_of(ShiftAmts, OutOfRange)) {
519  SmallVector<Constant *, 8> ConstantVec;
520  for (int Idx : ShiftAmts) {
521  if (Idx < 0) {
522  ConstantVec.push_back(UndefValue::get(SVT));
523  } else {
524  assert(LogicalShift && "Logical shift expected");
525  ConstantVec.push_back(ConstantInt::getNullValue(SVT));
526  }
527  }
528  return ConstantVector::get(ConstantVec);
529  }
530 
531  // We can't handle only some out of range values with generic logical shifts.
532  if (AnyOutOfRange)
533  return nullptr;
534 
535  // Build the shift amount constant vector.
536  SmallVector<Constant *, 8> ShiftVecAmts;
537  for (int Idx : ShiftAmts) {
538  if (Idx < 0)
539  ShiftVecAmts.push_back(UndefValue::get(SVT));
540  else
541  ShiftVecAmts.push_back(ConstantInt::get(SVT, Idx));
542  }
543  auto ShiftVec = ConstantVector::get(ShiftVecAmts);
544 
545  if (ShiftLeft)
546  return Builder.CreateShl(Vec, ShiftVec);
547 
548  if (LogicalShift)
549  return Builder.CreateLShr(Vec, ShiftVec);
550 
551  return Builder.CreateAShr(Vec, ShiftVec);
552 }
553 
555  InstCombiner::BuilderTy &Builder) {
556  Value *Arg0 = II.getArgOperand(0);
557  Value *Arg1 = II.getArgOperand(1);
558  Type *ResTy = II.getType();
559  assert(Arg0->getType()->getScalarSizeInBits() == 32 &&
560  Arg1->getType()->getScalarSizeInBits() == 32 &&
561  ResTy->getScalarSizeInBits() == 64 && "Unexpected muldq/muludq types");
562 
563  // muldq/muludq(undef, undef) -> zero (matches generic mul behavior)
564  if (isa<UndefValue>(Arg0) || isa<UndefValue>(Arg1))
565  return ConstantAggregateZero::get(ResTy);
566 
567  // Constant folding.
568  // PMULDQ = (mul(vXi64 sext(shuffle<0,2,..>(Arg0)),
569  // vXi64 sext(shuffle<0,2,..>(Arg1))))
570  // PMULUDQ = (mul(vXi64 zext(shuffle<0,2,..>(Arg0)),
571  // vXi64 zext(shuffle<0,2,..>(Arg1))))
572  if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
573  return nullptr;
574 
575  unsigned NumElts = ResTy->getVectorNumElements();
576  assert(Arg0->getType()->getVectorNumElements() == (2 * NumElts) &&
577  Arg1->getType()->getVectorNumElements() == (2 * NumElts) &&
578  "Unexpected muldq/muludq types");
579 
580  unsigned IntrinsicID = II.getIntrinsicID();
581  bool IsSigned = (Intrinsic::x86_sse41_pmuldq == IntrinsicID ||
582  Intrinsic::x86_avx2_pmul_dq == IntrinsicID ||
583  Intrinsic::x86_avx512_pmul_dq_512 == IntrinsicID);
584 
585  SmallVector<unsigned, 16> ShuffleMask;
586  for (unsigned i = 0; i != NumElts; ++i)
587  ShuffleMask.push_back(i * 2);
588 
589  auto *LHS = Builder.CreateShuffleVector(Arg0, Arg0, ShuffleMask);
590  auto *RHS = Builder.CreateShuffleVector(Arg1, Arg1, ShuffleMask);
591 
592  if (IsSigned) {
593  LHS = Builder.CreateSExt(LHS, ResTy);
594  RHS = Builder.CreateSExt(RHS, ResTy);
595  } else {
596  LHS = Builder.CreateZExt(LHS, ResTy);
597  RHS = Builder.CreateZExt(RHS, ResTy);
598  }
599 
600  return Builder.CreateMul(LHS, RHS);
601 }
602 
603 static Value *simplifyX86pack(IntrinsicInst &II, bool IsSigned) {
604  Value *Arg0 = II.getArgOperand(0);
605  Value *Arg1 = II.getArgOperand(1);
606  Type *ResTy = II.getType();
607 
608  // Fast all undef handling.
609  if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
610  return UndefValue::get(ResTy);
611 
612  Type *ArgTy = Arg0->getType();
613  unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128;
614  unsigned NumDstElts = ResTy->getVectorNumElements();
615  unsigned NumSrcElts = ArgTy->getVectorNumElements();
616  assert(NumDstElts == (2 * NumSrcElts) && "Unexpected packing types");
617 
618  unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
619  unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
620  unsigned DstScalarSizeInBits = ResTy->getScalarSizeInBits();
621  assert(ArgTy->getScalarSizeInBits() == (2 * DstScalarSizeInBits) &&
622  "Unexpected packing types");
623 
624  // Constant folding.
625  auto *Cst0 = dyn_cast<Constant>(Arg0);
626  auto *Cst1 = dyn_cast<Constant>(Arg1);
627  if (!Cst0 || !Cst1)
628  return nullptr;
629 
631  for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
632  for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) {
633  unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane;
634  auto *Cst = (Elt >= NumSrcEltsPerLane) ? Cst1 : Cst0;
635  auto *COp = Cst->getAggregateElement(SrcIdx);
636  if (COp && isa<UndefValue>(COp)) {
637  Vals.push_back(UndefValue::get(ResTy->getScalarType()));
638  continue;
639  }
640 
641  auto *CInt = dyn_cast_or_null<ConstantInt>(COp);
642  if (!CInt)
643  return nullptr;
644 
645  APInt Val = CInt->getValue();
646  assert(Val.getBitWidth() == ArgTy->getScalarSizeInBits() &&
647  "Unexpected constant bitwidth");
648 
649  if (IsSigned) {
650  // PACKSS: Truncate signed value with signed saturation.
651  // Source values less than dst minint are saturated to minint.
652  // Source values greater than dst maxint are saturated to maxint.
653  if (Val.isSignedIntN(DstScalarSizeInBits))
654  Val = Val.trunc(DstScalarSizeInBits);
655  else if (Val.isNegative())
656  Val = APInt::getSignedMinValue(DstScalarSizeInBits);
657  else
658  Val = APInt::getSignedMaxValue(DstScalarSizeInBits);
659  } else {
660  // PACKUS: Truncate signed value with unsigned saturation.
661  // Source values less than zero are saturated to zero.
662  // Source values greater than dst maxuint are saturated to maxuint.
663  if (Val.isIntN(DstScalarSizeInBits))
664  Val = Val.trunc(DstScalarSizeInBits);
665  else if (Val.isNegative())
666  Val = APInt::getNullValue(DstScalarSizeInBits);
667  else
668  Val = APInt::getAllOnesValue(DstScalarSizeInBits);
669  }
670 
671  Vals.push_back(ConstantInt::get(ResTy->getScalarType(), Val));
672  }
673  }
674 
675  return ConstantVector::get(Vals);
676 }
677 
679  Value *Arg = II.getArgOperand(0);
680  Type *ResTy = II.getType();
681  Type *ArgTy = Arg->getType();
682 
683  // movmsk(undef) -> zero as we must ensure the upper bits are zero.
684  if (isa<UndefValue>(Arg))
685  return Constant::getNullValue(ResTy);
686 
687  // We can't easily peek through x86_mmx types.
688  if (!ArgTy->isVectorTy())
689  return nullptr;
690 
691  auto *C = dyn_cast<Constant>(Arg);
692  if (!C)
693  return nullptr;
694 
695  // Extract signbits of the vector input and pack into integer result.
696  APInt Result(ResTy->getPrimitiveSizeInBits(), 0);
697  for (unsigned I = 0, E = ArgTy->getVectorNumElements(); I != E; ++I) {
698  auto *COp = C->getAggregateElement(I);
699  if (!COp)
700  return nullptr;
701  if (isa<UndefValue>(COp))
702  continue;
703 
704  auto *CInt = dyn_cast<ConstantInt>(COp);
705  auto *CFp = dyn_cast<ConstantFP>(COp);
706  if (!CInt && !CFp)
707  return nullptr;
708 
709  if ((CInt && CInt->isNegative()) || (CFp && CFp->isNegative()))
710  Result.setBit(I);
711  }
712 
713  return Constant::getIntegerValue(ResTy, Result);
714 }
715 
717  InstCombiner::BuilderTy &Builder) {
718  auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
719  if (!CInt)
720  return nullptr;
721 
722  VectorType *VecTy = cast<VectorType>(II.getType());
723  assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
724 
725  // The immediate permute control byte looks like this:
726  // [3:0] - zero mask for each 32-bit lane
727  // [5:4] - select one 32-bit destination lane
728  // [7:6] - select one 32-bit source lane
729 
730  uint8_t Imm = CInt->getZExtValue();
731  uint8_t ZMask = Imm & 0xf;
732  uint8_t DestLane = (Imm >> 4) & 0x3;
733  uint8_t SourceLane = (Imm >> 6) & 0x3;
734 
736 
737  // If all zero mask bits are set, this was just a weird way to
738  // generate a zero vector.
739  if (ZMask == 0xf)
740  return ZeroVector;
741 
742  // Initialize by passing all of the first source bits through.
743  uint32_t ShuffleMask[4] = { 0, 1, 2, 3 };
744 
745  // We may replace the second operand with the zero vector.
746  Value *V1 = II.getArgOperand(1);
747 
748  if (ZMask) {
749  // If the zero mask is being used with a single input or the zero mask
750  // overrides the destination lane, this is a shuffle with the zero vector.
751  if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
752  (ZMask & (1 << DestLane))) {
753  V1 = ZeroVector;
754  // We may still move 32-bits of the first source vector from one lane
755  // to another.
756  ShuffleMask[DestLane] = SourceLane;
757  // The zero mask may override the previous insert operation.
758  for (unsigned i = 0; i < 4; ++i)
759  if ((ZMask >> i) & 0x1)
760  ShuffleMask[i] = i + 4;
761  } else {
762  // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
763  return nullptr;
764  }
765  } else {
766  // Replace the selected destination lane with the selected source lane.
767  ShuffleMask[DestLane] = SourceLane + 4;
768  }
769 
770  return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
771 }
772 
773 /// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding
774 /// or conversion to a shuffle vector.
776  ConstantInt *CILength, ConstantInt *CIIndex,
777  InstCombiner::BuilderTy &Builder) {
778  auto LowConstantHighUndef = [&](uint64_t Val) {
779  Type *IntTy64 = Type::getInt64Ty(II.getContext());
780  Constant *Args[] = {ConstantInt::get(IntTy64, Val),
781  UndefValue::get(IntTy64)};
782  return ConstantVector::get(Args);
783  };
784 
785  // See if we're dealing with constant values.
786  Constant *C0 = dyn_cast<Constant>(Op0);
787  ConstantInt *CI0 =
788  C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
789  : nullptr;
790 
791  // Attempt to constant fold.
792  if (CILength && CIIndex) {
793  // From AMD documentation: "The bit index and field length are each six
794  // bits in length other bits of the field are ignored."
795  APInt APIndex = CIIndex->getValue().zextOrTrunc(6);
796  APInt APLength = CILength->getValue().zextOrTrunc(6);
797 
798  unsigned Index = APIndex.getZExtValue();
799 
800  // From AMD documentation: "a value of zero in the field length is
801  // defined as length of 64".
802  unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
803 
804  // From AMD documentation: "If the sum of the bit index + length field
805  // is greater than 64, the results are undefined".
806  unsigned End = Index + Length;
807 
808  // Note that both field index and field length are 8-bit quantities.
809  // Since variables 'Index' and 'Length' are unsigned values
810  // obtained from zero-extending field index and field length
811  // respectively, their sum should never wrap around.
812  if (End > 64)
813  return UndefValue::get(II.getType());
814 
815  // If we are inserting whole bytes, we can convert this to a shuffle.
816  // Lowering can recognize EXTRQI shuffle masks.
817  if ((Length % 8) == 0 && (Index % 8) == 0) {
818  // Convert bit indices to byte indices.
819  Length /= 8;
820  Index /= 8;
821 
822  Type *IntTy8 = Type::getInt8Ty(II.getContext());
823  Type *IntTy32 = Type::getInt32Ty(II.getContext());
824  VectorType *ShufTy = VectorType::get(IntTy8, 16);
825 
826  SmallVector<Constant *, 16> ShuffleMask;
827  for (int i = 0; i != (int)Length; ++i)
828  ShuffleMask.push_back(
829  Constant::getIntegerValue(IntTy32, APInt(32, i + Index)));
830  for (int i = Length; i != 8; ++i)
831  ShuffleMask.push_back(
832  Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
833  for (int i = 8; i != 16; ++i)
834  ShuffleMask.push_back(UndefValue::get(IntTy32));
835 
836  Value *SV = Builder.CreateShuffleVector(
837  Builder.CreateBitCast(Op0, ShufTy),
838  ConstantAggregateZero::get(ShufTy), ConstantVector::get(ShuffleMask));
839  return Builder.CreateBitCast(SV, II.getType());
840  }
841 
842  // Constant Fold - shift Index'th bit to lowest position and mask off
843  // Length bits.
844  if (CI0) {
845  APInt Elt = CI0->getValue();
846  Elt.lshrInPlace(Index);
847  Elt = Elt.zextOrTrunc(Length);
848  return LowConstantHighUndef(Elt.getZExtValue());
849  }
850 
851  // If we were an EXTRQ call, we'll save registers if we convert to EXTRQI.
852  if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
853  Value *Args[] = {Op0, CILength, CIIndex};
854  Module *M = II.getModule();
855  Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
856  return Builder.CreateCall(F, Args);
857  }
858  }
859 
860  // Constant Fold - extraction from zero is always {zero, undef}.
861  if (CI0 && CI0->isZero())
862  return LowConstantHighUndef(0);
863 
864  return nullptr;
865 }
866 
867 /// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant
868 /// folding or conversion to a shuffle vector.
870  APInt APLength, APInt APIndex,
871  InstCombiner::BuilderTy &Builder) {
872  // From AMD documentation: "The bit index and field length are each six bits
873  // in length other bits of the field are ignored."
874  APIndex = APIndex.zextOrTrunc(6);
875  APLength = APLength.zextOrTrunc(6);
876 
877  // Attempt to constant fold.
878  unsigned Index = APIndex.getZExtValue();
879 
880  // From AMD documentation: "a value of zero in the field length is
881  // defined as length of 64".
882  unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
883 
884  // From AMD documentation: "If the sum of the bit index + length field
885  // is greater than 64, the results are undefined".
886  unsigned End = Index + Length;
887 
888  // Note that both field index and field length are 8-bit quantities.
889  // Since variables 'Index' and 'Length' are unsigned values
890  // obtained from zero-extending field index and field length
891  // respectively, their sum should never wrap around.
892  if (End > 64)
893  return UndefValue::get(II.getType());
894 
895  // If we are inserting whole bytes, we can convert this to a shuffle.
896  // Lowering can recognize INSERTQI shuffle masks.
897  if ((Length % 8) == 0 && (Index % 8) == 0) {
898  // Convert bit indices to byte indices.
899  Length /= 8;
900  Index /= 8;
901 
902  Type *IntTy8 = Type::getInt8Ty(II.getContext());
903  Type *IntTy32 = Type::getInt32Ty(II.getContext());
904  VectorType *ShufTy = VectorType::get(IntTy8, 16);
905 
906  SmallVector<Constant *, 16> ShuffleMask;
907  for (int i = 0; i != (int)Index; ++i)
908  ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
909  for (int i = 0; i != (int)Length; ++i)
910  ShuffleMask.push_back(
911  Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
912  for (int i = Index + Length; i != 8; ++i)
913  ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
914  for (int i = 8; i != 16; ++i)
915  ShuffleMask.push_back(UndefValue::get(IntTy32));
916 
917  Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),
918  Builder.CreateBitCast(Op1, ShufTy),
919  ConstantVector::get(ShuffleMask));
920  return Builder.CreateBitCast(SV, II.getType());
921  }
922 
923  // See if we're dealing with constant values.
924  Constant *C0 = dyn_cast<Constant>(Op0);
925  Constant *C1 = dyn_cast<Constant>(Op1);
926  ConstantInt *CI00 =
927  C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
928  : nullptr;
929  ConstantInt *CI10 =
930  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
931  : nullptr;
932 
933  // Constant Fold - insert bottom Length bits starting at the Index'th bit.
934  if (CI00 && CI10) {
935  APInt V00 = CI00->getValue();
936  APInt V10 = CI10->getValue();
937  APInt Mask = APInt::getLowBitsSet(64, Length).shl(Index);
938  V00 = V00 & ~Mask;
939  V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index);
940  APInt Val = V00 | V10;
941  Type *IntTy64 = Type::getInt64Ty(II.getContext());
942  Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()),
943  UndefValue::get(IntTy64)};
944  return ConstantVector::get(Args);
945  }
946 
947  // If we were an INSERTQ call, we'll save demanded elements if we convert to
948  // INSERTQI.
949  if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
950  Type *IntTy8 = Type::getInt8Ty(II.getContext());
951  Constant *CILength = ConstantInt::get(IntTy8, Length, false);
952  Constant *CIIndex = ConstantInt::get(IntTy8, Index, false);
953 
954  Value *Args[] = {Op0, Op1, CILength, CIIndex};
955  Module *M = II.getModule();
956  Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
957  return Builder.CreateCall(F, Args);
958  }
959 
960  return nullptr;
961 }
962 
963 /// Attempt to convert pshufb* to shufflevector if the mask is constant.
965  InstCombiner::BuilderTy &Builder) {
967  if (!V)
968  return nullptr;
969 
970  auto *VecTy = cast<VectorType>(II.getType());
971  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
972  unsigned NumElts = VecTy->getNumElements();
973  assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
974  "Unexpected number of elements in shuffle mask!");
975 
976  // Construct a shuffle mask from constant integers or UNDEFs.
977  Constant *Indexes[64] = {nullptr};
978 
979  // Each byte in the shuffle control mask forms an index to permute the
980  // corresponding byte in the destination operand.
981  for (unsigned I = 0; I < NumElts; ++I) {
982  Constant *COp = V->getAggregateElement(I);
983  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
984  return nullptr;
985 
986  if (isa<UndefValue>(COp)) {
987  Indexes[I] = UndefValue::get(MaskEltTy);
988  continue;
989  }
990 
991  int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
992 
993  // If the most significant bit (bit[7]) of each byte of the shuffle
994  // control mask is set, then zero is written in the result byte.
995  // The zero vector is in the right-hand side of the resulting
996  // shufflevector.
997 
998  // The value of each index for the high 128-bit lane is the least
999  // significant 4 bits of the respective shuffle control byte.
1000  Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0);
1001  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1002  }
1003 
1004  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
1005  auto V1 = II.getArgOperand(0);
1006  auto V2 = Constant::getNullValue(VecTy);
1007  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1008 }
1009 
1010 /// Attempt to convert vpermilvar* to shufflevector if the mask is constant.
1012  InstCombiner::BuilderTy &Builder) {
1013  Constant *V = dyn_cast<Constant>(II.getArgOperand(1));
1014  if (!V)
1015  return nullptr;
1016 
1017  auto *VecTy = cast<VectorType>(II.getType());
1018  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
1019  unsigned NumElts = VecTy->getVectorNumElements();
1020  bool IsPD = VecTy->getScalarType()->isDoubleTy();
1021  unsigned NumLaneElts = IsPD ? 2 : 4;
1022  assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
1023 
1024  // Construct a shuffle mask from constant integers or UNDEFs.
1025  Constant *Indexes[16] = {nullptr};
1026 
1027  // The intrinsics only read one or two bits, clear the rest.
1028  for (unsigned I = 0; I < NumElts; ++I) {
1029  Constant *COp = V->getAggregateElement(I);
1030  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1031  return nullptr;
1032 
1033  if (isa<UndefValue>(COp)) {
1034  Indexes[I] = UndefValue::get(MaskEltTy);
1035  continue;
1036  }
1037 
1038  APInt Index = cast<ConstantInt>(COp)->getValue();
1039  Index = Index.zextOrTrunc(32).getLoBits(2);
1040 
1041  // The PD variants uses bit 1 to select per-lane element index, so
1042  // shift down to convert to generic shuffle mask index.
1043  if (IsPD)
1044  Index.lshrInPlace(1);
1045 
1046  // The _256 variants are a bit trickier since the mask bits always index
1047  // into the corresponding 128 half. In order to convert to a generic
1048  // shuffle, we have to make that explicit.
1049  Index += APInt(32, (I / NumLaneElts) * NumLaneElts);
1050 
1051  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1052  }
1053 
1054  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
1055  auto V1 = II.getArgOperand(0);
1056  auto V2 = UndefValue::get(V1->getType());
1057  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1058 }
1059 
1060 /// Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
1062  InstCombiner::BuilderTy &Builder) {
1063  auto *V = dyn_cast<Constant>(II.getArgOperand(1));
1064  if (!V)
1065  return nullptr;
1066 
1067  auto *VecTy = cast<VectorType>(II.getType());
1068  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
1069  unsigned Size = VecTy->getNumElements();
1070  assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) &&
1071  "Unexpected shuffle mask size");
1072 
1073  // Construct a shuffle mask from constant integers or UNDEFs.
1074  Constant *Indexes[64] = {nullptr};
1075 
1076  for (unsigned I = 0; I < Size; ++I) {
1077  Constant *COp = V->getAggregateElement(I);
1078  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1079  return nullptr;
1080 
1081  if (isa<UndefValue>(COp)) {
1082  Indexes[I] = UndefValue::get(MaskEltTy);
1083  continue;
1084  }
1085 
1086  uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
1087  Index &= Size - 1;
1088  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1089  }
1090 
1091  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, Size));
1092  auto V1 = II.getArgOperand(0);
1093  auto V2 = UndefValue::get(VecTy);
1094  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1095 }
1096 
1097 /// Decode XOP integer vector comparison intrinsics.
1099  InstCombiner::BuilderTy &Builder,
1100  bool IsSigned) {
1101  if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
1102  uint64_t Imm = CInt->getZExtValue() & 0x7;
1103  VectorType *VecTy = cast<VectorType>(II.getType());
1105 
1106  switch (Imm) {
1107  case 0x0:
1108  Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1109  break;
1110  case 0x1:
1111  Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1112  break;
1113  case 0x2:
1114  Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1115  break;
1116  case 0x3:
1117  Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1118  break;
1119  case 0x4:
1120  Pred = ICmpInst::ICMP_EQ; break;
1121  case 0x5:
1122  Pred = ICmpInst::ICMP_NE; break;
1123  case 0x6:
1124  return ConstantInt::getSigned(VecTy, 0); // FALSE
1125  case 0x7:
1126  return ConstantInt::getSigned(VecTy, -1); // TRUE
1127  }
1128 
1129  if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0),
1130  II.getArgOperand(1)))
1131  return Builder.CreateSExtOrTrunc(Cmp, VecTy);
1132  }
1133  return nullptr;
1134 }
1135 
1136 // Emit a select instruction and appropriate bitcasts to help simplify
1137 // masked intrinsics.
1139  InstCombiner::BuilderTy &Builder) {
1140  unsigned VWidth = Op0->getType()->getVectorNumElements();
1141 
1142  // If the mask is all ones we don't need the select. But we need to check
1143  // only the bit thats will be used in case VWidth is less than 8.
1144  if (auto *C = dyn_cast<ConstantInt>(Mask))
1145  if (C->getValue().zextOrTrunc(VWidth).isAllOnesValue())
1146  return Op0;
1147 
1148  auto *MaskTy = VectorType::get(Builder.getInt1Ty(),
1149  cast<IntegerType>(Mask->getType())->getBitWidth());
1150  Mask = Builder.CreateBitCast(Mask, MaskTy);
1151 
1152  // If we have less than 8 elements, then the starting mask was an i8 and
1153  // we need to extract down to the right number of elements.
1154  if (VWidth < 8) {
1155  uint32_t Indices[4];
1156  for (unsigned i = 0; i != VWidth; ++i)
1157  Indices[i] = i;
1158  Mask = Builder.CreateShuffleVector(Mask, Mask,
1159  makeArrayRef(Indices, VWidth),
1160  "extract");
1161  }
1162 
1163  return Builder.CreateSelect(Mask, Op0, Op1);
1164 }
1165 
1167  Value *Arg0 = II.getArgOperand(0);
1168  Value *Arg1 = II.getArgOperand(1);
1169 
1170  // fmin(x, x) -> x
1171  if (Arg0 == Arg1)
1172  return Arg0;
1173 
1174  const auto *C1 = dyn_cast<ConstantFP>(Arg1);
1175 
1176  // fmin(x, nan) -> x
1177  if (C1 && C1->isNaN())
1178  return Arg0;
1179 
1180  // This is the value because if undef were NaN, we would return the other
1181  // value and cannot return a NaN unless both operands are.
1182  //
1183  // fmin(undef, x) -> x
1184  if (isa<UndefValue>(Arg0))
1185  return Arg1;
1186 
1187  // fmin(x, undef) -> x
1188  if (isa<UndefValue>(Arg1))
1189  return Arg0;
1190 
1191  Value *X = nullptr;
1192  Value *Y = nullptr;
1193  if (II.getIntrinsicID() == Intrinsic::minnum) {
1194  // fmin(x, fmin(x, y)) -> fmin(x, y)
1195  // fmin(y, fmin(x, y)) -> fmin(x, y)
1196  if (match(Arg1, m_FMin(m_Value(X), m_Value(Y)))) {
1197  if (Arg0 == X || Arg0 == Y)
1198  return Arg1;
1199  }
1200 
1201  // fmin(fmin(x, y), x) -> fmin(x, y)
1202  // fmin(fmin(x, y), y) -> fmin(x, y)
1203  if (match(Arg0, m_FMin(m_Value(X), m_Value(Y)))) {
1204  if (Arg1 == X || Arg1 == Y)
1205  return Arg0;
1206  }
1207 
1208  // TODO: fmin(nnan x, inf) -> x
1209  // TODO: fmin(nnan ninf x, flt_max) -> x
1210  if (C1 && C1->isInfinity()) {
1211  // fmin(x, -inf) -> -inf
1212  if (C1->isNegative())
1213  return Arg1;
1214  }
1215  } else {
1217  // fmax(x, fmax(x, y)) -> fmax(x, y)
1218  // fmax(y, fmax(x, y)) -> fmax(x, y)
1219  if (match(Arg1, m_FMax(m_Value(X), m_Value(Y)))) {
1220  if (Arg0 == X || Arg0 == Y)
1221  return Arg1;
1222  }
1223 
1224  // fmax(fmax(x, y), x) -> fmax(x, y)
1225  // fmax(fmax(x, y), y) -> fmax(x, y)
1226  if (match(Arg0, m_FMax(m_Value(X), m_Value(Y)))) {
1227  if (Arg1 == X || Arg1 == Y)
1228  return Arg0;
1229  }
1230 
1231  // TODO: fmax(nnan x, -inf) -> x
1232  // TODO: fmax(nnan ninf x, -flt_max) -> x
1233  if (C1 && C1->isInfinity()) {
1234  // fmax(x, inf) -> inf
1235  if (!C1->isNegative())
1236  return Arg1;
1237  }
1238  }
1239  return nullptr;
1240 }
1241 
1243  auto *ConstMask = dyn_cast<Constant>(Mask);
1244  if (!ConstMask)
1245  return false;
1246  if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1247  return true;
1248  for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
1249  ++I) {
1250  if (auto *MaskElt = ConstMask->getAggregateElement(I))
1251  if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1252  continue;
1253  return false;
1254  }
1255  return true;
1256 }
1257 
1259  InstCombiner::BuilderTy &Builder) {
1260  // If the mask is all ones or undefs, this is a plain vector load of the 1st
1261  // argument.
1262  if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
1263  Value *LoadPtr = II.getArgOperand(0);
1264  unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
1265  return Builder.CreateAlignedLoad(LoadPtr, Alignment, "unmaskedload");
1266  }
1267 
1268  return nullptr;
1269 }
1270 
1272  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
1273  if (!ConstMask)
1274  return nullptr;
1275 
1276  // If the mask is all zeros, this instruction does nothing.
1277  if (ConstMask->isNullValue())
1278  return IC.eraseInstFromFunction(II);
1279 
1280  // If the mask is all ones, this is a plain vector store of the 1st argument.
1281  if (ConstMask->isAllOnesValue()) {
1282  Value *StorePtr = II.getArgOperand(1);
1283  unsigned Alignment = cast<ConstantInt>(II.getArgOperand(2))->getZExtValue();
1284  return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
1285  }
1286 
1287  return nullptr;
1288 }
1289 
1291  // If the mask is all zeros, return the "passthru" argument of the gather.
1292  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
1293  if (ConstMask && ConstMask->isNullValue())
1294  return IC.replaceInstUsesWith(II, II.getArgOperand(3));
1295 
1296  return nullptr;
1297 }
1298 
1300  // If the mask is all zeros, a scatter does nothing.
1301  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
1302  if (ConstMask && ConstMask->isNullValue())
1303  return IC.eraseInstFromFunction(II);
1304 
1305  return nullptr;
1306 }
1307 
1309  assert((II.getIntrinsicID() == Intrinsic::cttz ||
1310  II.getIntrinsicID() == Intrinsic::ctlz) &&
1311  "Expected cttz or ctlz intrinsic");
1312  Value *Op0 = II.getArgOperand(0);
1313 
1314  KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
1315 
1316  // Create a mask for bits above (ctlz) or below (cttz) the first known one.
1317  bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
1318  unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
1319  : Known.countMaxLeadingZeros();
1320  unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
1321  : Known.countMinLeadingZeros();
1322 
1323  // If all bits above (ctlz) or below (cttz) the first known one are known
1324  // zero, this value is constant.
1325  // FIXME: This should be in InstSimplify because we're replacing an
1326  // instruction with a constant.
1327  if (PossibleZeros == DefiniteZeros) {
1328  auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
1329  return IC.replaceInstUsesWith(II, C);
1330  }
1331 
1332  // If the input to cttz/ctlz is known to be non-zero,
1333  // then change the 'ZeroIsUndef' parameter to 'true'
1334  // because we know the zero behavior can't affect the result.
1335  if (!Known.One.isNullValue() ||
1336  isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II,
1337  &IC.getDominatorTree())) {
1338  if (!match(II.getArgOperand(1), m_One())) {
1339  II.setOperand(1, IC.Builder.getTrue());
1340  return &II;
1341  }
1342  }
1343 
1344  // Add range metadata since known bits can't completely reflect what we know.
1345  // TODO: Handle splat vectors.
1346  auto *IT = dyn_cast<IntegerType>(Op0->getType());
1347  if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
1348  Metadata *LowAndHigh[] = {
1349  ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)),
1350  ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))};
1353  return &II;
1354  }
1355 
1356  return nullptr;
1357 }
1358 
1360  assert(II.getIntrinsicID() == Intrinsic::ctpop &&
1361  "Expected ctpop intrinsic");
1362  Value *Op0 = II.getArgOperand(0);
1363  // FIXME: Try to simplify vectors of integers.
1364  auto *IT = dyn_cast<IntegerType>(Op0->getType());
1365  if (!IT)
1366  return nullptr;
1367 
1368  unsigned BitWidth = IT->getBitWidth();
1369  KnownBits Known(BitWidth);
1370  IC.computeKnownBits(Op0, Known, 0, &II);
1371 
1372  unsigned MinCount = Known.countMinPopulation();
1373  unsigned MaxCount = Known.countMaxPopulation();
1374 
1375  // Add range metadata since known bits can't completely reflect what we know.
1376  if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
1377  Metadata *LowAndHigh[] = {
1379  ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))};
1382  return &II;
1383  }
1384 
1385  return nullptr;
1386 }
1387 
1388 // TODO: If the x86 backend knew how to convert a bool vector mask back to an
1389 // XMM register mask efficiently, we could transform all x86 masked intrinsics
1390 // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
1392  Value *Ptr = II.getOperand(0);
1393  Value *Mask = II.getOperand(1);
1394  Constant *ZeroVec = Constant::getNullValue(II.getType());
1395 
1396  // Special case a zero mask since that's not a ConstantDataVector.
1397  // This masked load instruction creates a zero vector.
1398  if (isa<ConstantAggregateZero>(Mask))
1399  return IC.replaceInstUsesWith(II, ZeroVec);
1400 
1401  auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
1402  if (!ConstMask)
1403  return nullptr;
1404 
1405  // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
1406  // to allow target-independent optimizations.
1407 
1408  // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
1409  // the LLVM intrinsic definition for the pointer argument.
1410  unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
1411  PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace);
1412  Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
1413 
1414  // Second, convert the x86 XMM integer vector mask to a vector of bools based
1415  // on each element's most significant bit (the sign bit).
1416  Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
1417 
1418  // The pass-through vector for an x86 masked load is a zero vector.
1419  CallInst *NewMaskedLoad =
1420  IC.Builder.CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec);
1421  return IC.replaceInstUsesWith(II, NewMaskedLoad);
1422 }
1423 
1424 // TODO: If the x86 backend knew how to convert a bool vector mask back to an
1425 // XMM register mask efficiently, we could transform all x86 masked intrinsics
1426 // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
1428  Value *Ptr = II.getOperand(0);
1429  Value *Mask = II.getOperand(1);
1430  Value *Vec = II.getOperand(2);
1431 
1432  // Special case a zero mask since that's not a ConstantDataVector:
1433  // this masked store instruction does nothing.
1434  if (isa<ConstantAggregateZero>(Mask)) {
1435  IC.eraseInstFromFunction(II);
1436  return true;
1437  }
1438 
1439  // The SSE2 version is too weird (eg, unaligned but non-temporal) to do
1440  // anything else at this level.
1441  if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
1442  return false;
1443 
1444  auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
1445  if (!ConstMask)
1446  return false;
1447 
1448  // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
1449  // to allow target-independent optimizations.
1450 
1451  // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
1452  // the LLVM intrinsic definition for the pointer argument.
1453  unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
1454  PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace);
1455  Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
1456 
1457  // Second, convert the x86 XMM integer vector mask to a vector of bools based
1458  // on each element's most significant bit (the sign bit).
1459  Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
1460 
1461  IC.Builder.CreateMaskedStore(Vec, PtrCast, 1, BoolMask);
1462 
1463  // 'Replace uses' doesn't work for stores. Erase the original masked store.
1464  IC.eraseInstFromFunction(II);
1465  return true;
1466 }
1467 
1468 // Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
1469 //
1470 // A single NaN input is folded to minnum, so we rely on that folding for
1471 // handling NaNs.
1472 static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
1473  const APFloat &Src2) {
1474  APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
1475 
1476  APFloat::cmpResult Cmp0 = Max3.compare(Src0);
1477  assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
1478  if (Cmp0 == APFloat::cmpEqual)
1479  return maxnum(Src1, Src2);
1480 
1481  APFloat::cmpResult Cmp1 = Max3.compare(Src1);
1482  assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
1483  if (Cmp1 == APFloat::cmpEqual)
1484  return maxnum(Src0, Src2);
1485 
1486  return maxnum(Src0, Src1);
1487 }
1488 
1489 // Returns true iff the 2 intrinsics have the same operands, limiting the
1490 // comparison to the first NumOperands.
1491 static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
1492  unsigned NumOperands) {
1493  assert(I.getNumArgOperands() >= NumOperands && "Not enough operands");
1494  assert(E.getNumArgOperands() >= NumOperands && "Not enough operands");
1495  for (unsigned i = 0; i < NumOperands; i++)
1496  if (I.getArgOperand(i) != E.getArgOperand(i))
1497  return false;
1498  return true;
1499 }
1500 
1501 // Remove trivially empty start/end intrinsic ranges, i.e. a start
1502 // immediately followed by an end (ignoring debuginfo or other
1503 // start/end intrinsics in between). As this handles only the most trivial
1504 // cases, tracking the nesting level is not needed:
1505 //
1506 // call @llvm.foo.start(i1 0) ; &I
1507 // call @llvm.foo.start(i1 0)
1508 // call @llvm.foo.end(i1 0) ; This one will not be skipped: it will be removed
1509 // call @llvm.foo.end(i1 0)
1510 static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID,
1511  unsigned EndID, InstCombiner &IC) {
1512  assert(I.getIntrinsicID() == StartID &&
1513  "Start intrinsic does not have expected ID");
1514  BasicBlock::iterator BI(I), BE(I.getParent()->end());
1515  for (++BI; BI != BE; ++BI) {
1516  if (auto *E = dyn_cast<IntrinsicInst>(BI)) {
1517  if (isa<DbgInfoIntrinsic>(E) || E->getIntrinsicID() == StartID)
1518  continue;
1519  if (E->getIntrinsicID() == EndID &&
1520  haveSameOperands(I, *E, E->getNumArgOperands())) {
1521  IC.eraseInstFromFunction(*E);
1522  IC.eraseInstFromFunction(I);
1523  return true;
1524  }
1525  }
1526  break;
1527  }
1528 
1529  return false;
1530 }
1531 
1532 // Convert NVVM intrinsics to target-generic LLVM code where possible.
1534  // Each NVVM intrinsic we can simplify can be replaced with one of:
1535  //
1536  // * an LLVM intrinsic,
1537  // * an LLVM cast operation,
1538  // * an LLVM binary operation, or
1539  // * ad-hoc LLVM IR for the particular operation.
1540 
1541  // Some transformations are only valid when the module's
1542  // flush-denormals-to-zero (ftz) setting is true/false, whereas other
1543  // transformations are valid regardless of the module's ftz setting.
1544  enum FtzRequirementTy {
1545  FTZ_Any, // Any ftz setting is ok.
1546  FTZ_MustBeOn, // Transformation is valid only if ftz is on.
1547  FTZ_MustBeOff, // Transformation is valid only if ftz is off.
1548  };
1549  // Classes of NVVM intrinsics that can't be replaced one-to-one with a
1550  // target-generic intrinsic, cast op, or binary op but that we can nonetheless
1551  // simplify.
1552  enum SpecialCase {
1553  SPC_Reciprocal,
1554  };
1555 
1556  // SimplifyAction is a poor-man's variant (plus an additional flag) that
1557  // represents how to replace an NVVM intrinsic with target-generic LLVM IR.
1558  struct SimplifyAction {
1559  // Invariant: At most one of these Optionals has a value.
1563  Optional<SpecialCase> Special;
1564 
1565  FtzRequirementTy FtzRequirement = FTZ_Any;
1566 
1567  SimplifyAction() = default;
1568 
1569  SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq)
1570  : IID(IID), FtzRequirement(FtzReq) {}
1571 
1572  // Cast operations don't have anything to do with FTZ, so we skip that
1573  // argument.
1574  SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {}
1575 
1576  SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq)
1577  : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
1578 
1579  SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
1580  : Special(Special), FtzRequirement(FtzReq) {}
1581  };
1582 
1583  // Try to generate a SimplifyAction describing how to replace our
1584  // IntrinsicInstr with target-generic LLVM IR.
1585  const SimplifyAction Action = [II]() -> SimplifyAction {
1586  switch (II->getIntrinsicID()) {
1587 
1588  // NVVM intrinsics that map directly to LLVM intrinsics.
1589  case Intrinsic::nvvm_ceil_d:
1590  return {Intrinsic::ceil, FTZ_Any};
1591  case Intrinsic::nvvm_ceil_f:
1592  return {Intrinsic::ceil, FTZ_MustBeOff};
1593  case Intrinsic::nvvm_ceil_ftz_f:
1594  return {Intrinsic::ceil, FTZ_MustBeOn};
1595  case Intrinsic::nvvm_fabs_d:
1596  return {Intrinsic::fabs, FTZ_Any};
1597  case Intrinsic::nvvm_fabs_f:
1598  return {Intrinsic::fabs, FTZ_MustBeOff};
1599  case Intrinsic::nvvm_fabs_ftz_f:
1600  return {Intrinsic::fabs, FTZ_MustBeOn};
1601  case Intrinsic::nvvm_floor_d:
1602  return {Intrinsic::floor, FTZ_Any};
1603  case Intrinsic::nvvm_floor_f:
1604  return {Intrinsic::floor, FTZ_MustBeOff};
1605  case Intrinsic::nvvm_floor_ftz_f:
1606  return {Intrinsic::floor, FTZ_MustBeOn};
1607  case Intrinsic::nvvm_fma_rn_d:
1608  return {Intrinsic::fma, FTZ_Any};
1609  case Intrinsic::nvvm_fma_rn_f:
1610  return {Intrinsic::fma, FTZ_MustBeOff};
1611  case Intrinsic::nvvm_fma_rn_ftz_f:
1612  return {Intrinsic::fma, FTZ_MustBeOn};
1613  case Intrinsic::nvvm_fmax_d:
1614  return {Intrinsic::maxnum, FTZ_Any};
1615  case Intrinsic::nvvm_fmax_f:
1616  return {Intrinsic::maxnum, FTZ_MustBeOff};
1617  case Intrinsic::nvvm_fmax_ftz_f:
1618  return {Intrinsic::maxnum, FTZ_MustBeOn};
1619  case Intrinsic::nvvm_fmin_d:
1620  return {Intrinsic::minnum, FTZ_Any};
1621  case Intrinsic::nvvm_fmin_f:
1622  return {Intrinsic::minnum, FTZ_MustBeOff};
1623  case Intrinsic::nvvm_fmin_ftz_f:
1624  return {Intrinsic::minnum, FTZ_MustBeOn};
1625  case Intrinsic::nvvm_round_d:
1626  return {Intrinsic::round, FTZ_Any};
1627  case Intrinsic::nvvm_round_f:
1628  return {Intrinsic::round, FTZ_MustBeOff};
1629  case Intrinsic::nvvm_round_ftz_f:
1630  return {Intrinsic::round, FTZ_MustBeOn};
1631  case Intrinsic::nvvm_sqrt_rn_d:
1632  return {Intrinsic::sqrt, FTZ_Any};
1633  case Intrinsic::nvvm_sqrt_f:
1634  // nvvm_sqrt_f is a special case. For most intrinsics, foo_ftz_f is the
1635  // ftz version, and foo_f is the non-ftz version. But nvvm_sqrt_f adopts
1636  // the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are
1637  // the versions with explicit ftz-ness.
1638  return {Intrinsic::sqrt, FTZ_Any};
1639  case Intrinsic::nvvm_sqrt_rn_f:
1640  return {Intrinsic::sqrt, FTZ_MustBeOff};
1641  case Intrinsic::nvvm_sqrt_rn_ftz_f:
1642  return {Intrinsic::sqrt, FTZ_MustBeOn};
1643  case Intrinsic::nvvm_trunc_d:
1644  return {Intrinsic::trunc, FTZ_Any};
1645  case Intrinsic::nvvm_trunc_f:
1646  return {Intrinsic::trunc, FTZ_MustBeOff};
1647  case Intrinsic::nvvm_trunc_ftz_f:
1648  return {Intrinsic::trunc, FTZ_MustBeOn};
1649 
1650  // NVVM intrinsics that map to LLVM cast operations.
1651  //
1652  // Note that llvm's target-generic conversion operators correspond to the rz
1653  // (round to zero) versions of the nvvm conversion intrinsics, even though
1654  // most everything else here uses the rn (round to nearest even) nvvm ops.
1655  case Intrinsic::nvvm_d2i_rz:
1656  case Intrinsic::nvvm_f2i_rz:
1657  case Intrinsic::nvvm_d2ll_rz:
1658  case Intrinsic::nvvm_f2ll_rz:
1659  return {Instruction::FPToSI};
1660  case Intrinsic::nvvm_d2ui_rz:
1661  case Intrinsic::nvvm_f2ui_rz:
1662  case Intrinsic::nvvm_d2ull_rz:
1663  case Intrinsic::nvvm_f2ull_rz:
1664  return {Instruction::FPToUI};
1665  case Intrinsic::nvvm_i2d_rz:
1666  case Intrinsic::nvvm_i2f_rz:
1667  case Intrinsic::nvvm_ll2d_rz:
1668  case Intrinsic::nvvm_ll2f_rz:
1669  return {Instruction::SIToFP};
1670  case Intrinsic::nvvm_ui2d_rz:
1671  case Intrinsic::nvvm_ui2f_rz:
1672  case Intrinsic::nvvm_ull2d_rz:
1673  case Intrinsic::nvvm_ull2f_rz:
1674  return {Instruction::UIToFP};
1675 
1676  // NVVM intrinsics that map to LLVM binary ops.
1677  case Intrinsic::nvvm_add_rn_d:
1678  return {Instruction::FAdd, FTZ_Any};
1679  case Intrinsic::nvvm_add_rn_f:
1680  return {Instruction::FAdd, FTZ_MustBeOff};
1681  case Intrinsic::nvvm_add_rn_ftz_f:
1682  return {Instruction::FAdd, FTZ_MustBeOn};
1683  case Intrinsic::nvvm_mul_rn_d:
1684  return {Instruction::FMul, FTZ_Any};
1685  case Intrinsic::nvvm_mul_rn_f:
1686  return {Instruction::FMul, FTZ_MustBeOff};
1687  case Intrinsic::nvvm_mul_rn_ftz_f:
1688  return {Instruction::FMul, FTZ_MustBeOn};
1689  case Intrinsic::nvvm_div_rn_d:
1690  return {Instruction::FDiv, FTZ_Any};
1691  case Intrinsic::nvvm_div_rn_f:
1692  return {Instruction::FDiv, FTZ_MustBeOff};
1693  case Intrinsic::nvvm_div_rn_ftz_f:
1694  return {Instruction::FDiv, FTZ_MustBeOn};
1695 
1696  // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but
1697  // need special handling.
1698  //
1699  // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just
1700  // as well.
1701  case Intrinsic::nvvm_rcp_rn_d:
1702  return {SPC_Reciprocal, FTZ_Any};
1703  case Intrinsic::nvvm_rcp_rn_f:
1704  return {SPC_Reciprocal, FTZ_MustBeOff};
1705  case Intrinsic::nvvm_rcp_rn_ftz_f:
1706  return {SPC_Reciprocal, FTZ_MustBeOn};
1707 
1708  // We do not currently simplify intrinsics that give an approximate answer.
1709  // These include:
1710  //
1711  // - nvvm_cos_approx_{f,ftz_f}
1712  // - nvvm_ex2_approx_{d,f,ftz_f}
1713  // - nvvm_lg2_approx_{d,f,ftz_f}
1714  // - nvvm_sin_approx_{f,ftz_f}
1715  // - nvvm_sqrt_approx_{f,ftz_f}
1716  // - nvvm_rsqrt_approx_{d,f,ftz_f}
1717  // - nvvm_div_approx_{ftz_d,ftz_f,f}
1718  // - nvvm_rcp_approx_ftz_d
1719  //
1720  // Ideally we'd encode them as e.g. "fast call @llvm.cos", where "fast"
1721  // means that fastmath is enabled in the intrinsic. Unfortunately only
1722  // binary operators (currently) have a fastmath bit in SelectionDAG, so this
1723  // information gets lost and we can't select on it.
1724  //
1725  // TODO: div and rcp are lowered to a binary op, so these we could in theory
1726  // lower them to "fast fdiv".
1727 
1728  default:
1729  return {};
1730  }
1731  }();
1732 
1733  // If Action.FtzRequirementTy is not satisfied by the module's ftz state, we
1734  // can bail out now. (Notice that in the case that IID is not an NVVM
1735  // intrinsic, we don't have to look up any module metadata, as
1736  // FtzRequirementTy will be FTZ_Any.)
1737  if (Action.FtzRequirement != FTZ_Any) {
1738  bool FtzEnabled =
1739  II->getFunction()->getFnAttribute("nvptx-f32ftz").getValueAsString() ==
1740  "true";
1741 
1742  if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
1743  return nullptr;
1744  }
1745 
1746  // Simplify to target-generic intrinsic.
1747  if (Action.IID) {
1749  // All the target-generic intrinsics currently of interest to us have one
1750  // type argument, equal to that of the nvvm intrinsic's argument.
1751  Type *Tys[] = {II->getArgOperand(0)->getType()};
1752  return CallInst::Create(
1753  Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args);
1754  }
1755 
1756  // Simplify to target-generic binary op.
1757  if (Action.BinaryOp)
1758  return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0),
1759  II->getArgOperand(1), II->getName());
1760 
1761  // Simplify to target-generic cast op.
1762  if (Action.CastOp)
1763  return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(),
1764  II->getName());
1765 
1766  // All that's left are the special cases.
1767  if (!Action.Special)
1768  return nullptr;
1769 
1770  switch (*Action.Special) {
1771  case SPC_Reciprocal:
1772  // Simplify reciprocal.
1773  return BinaryOperator::Create(
1774  Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1),
1775  II->getArgOperand(0), II->getName());
1776  }
1777  llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
1778 }
1779 
1781  removeTriviallyEmptyRange(I, Intrinsic::vastart, Intrinsic::vaend, *this);
1782  return nullptr;
1783 }
1784 
1786  removeTriviallyEmptyRange(I, Intrinsic::vacopy, Intrinsic::vaend, *this);
1787  return nullptr;
1788 }
1789 
1790 /// CallInst simplification. This mostly only handles folding of intrinsic
1791 /// instructions. For normal calls, it allows visitCallSite to do the heavy
1792 /// lifting.
1794  auto Args = CI.arg_operands();
1795  if (Value *V = SimplifyCall(&CI, CI.getCalledValue(), Args.begin(),
1796  Args.end(), SQ.getWithInstruction(&CI)))
1797  return replaceInstUsesWith(CI, V);
1798 
1799  if (isFreeCall(&CI, &TLI))
1800  return visitFree(CI);
1801 
1802  // If the caller function is nounwind, mark the call as nounwind, even if the
1803  // callee isn't.
1804  if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1805  CI.setDoesNotThrow();
1806  return &CI;
1807  }
1808 
1809  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
1810  if (!II) return visitCallSite(&CI);
1811 
1812  // Intrinsics cannot occur in an invoke, so handle them here instead of in
1813  // visitCallSite.
1814  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
1815  bool Changed = false;
1816 
1817  // memmove/cpy/set of zero bytes is a noop.
1818  if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
1819  if (NumBytes->isNullValue())
1820  return eraseInstFromFunction(CI);
1821 
1822  if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
1823  if (CI->getZExtValue() == 1) {
1824  // Replace the instruction with just byte operations. We would
1825  // transform other cases to loads/stores, but we don't know if
1826  // alignment is sufficient.
1827  }
1828  }
1829 
1830  // No other transformations apply to volatile transfers.
1831  if (MI->isVolatile())
1832  return nullptr;
1833 
1834  // If we have a memmove and the source operation is a constant global,
1835  // then the source and dest pointers can't alias, so we can change this
1836  // into a call to memcpy.
1837  if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
1838  if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1839  if (GVSrc->isConstant()) {
1840  Module *M = CI.getModule();
1841  Intrinsic::ID MemCpyID = Intrinsic::memcpy;
1842  Type *Tys[3] = { CI.getArgOperand(0)->getType(),
1843  CI.getArgOperand(1)->getType(),
1844  CI.getArgOperand(2)->getType() };
1845  CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
1846  Changed = true;
1847  }
1848  }
1849 
1850  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
1851  // memmove(x,x,size) -> noop.
1852  if (MTI->getSource() == MTI->getDest())
1853  return eraseInstFromFunction(CI);
1854  }
1855 
1856  // If we can determine a pointer alignment that is bigger than currently
1857  // set, update the alignment.
1858  if (isa<MemTransferInst>(MI)) {
1859  if (Instruction *I = SimplifyMemTransfer(MI))
1860  return I;
1861  } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
1862  if (Instruction *I = SimplifyMemSet(MSI))
1863  return I;
1864  }
1865 
1866  if (Changed) return II;
1867  }
1868 
1869  if (auto *AMI = dyn_cast<ElementUnorderedAtomicMemCpyInst>(II)) {
1870  if (Constant *C = dyn_cast<Constant>(AMI->getLength()))
1871  if (C->isNullValue())
1872  return eraseInstFromFunction(*AMI);
1873 
1874  if (Instruction *I = SimplifyElementUnorderedAtomicMemCpy(AMI))
1875  return I;
1876  }
1877 
1878  if (Instruction *I = SimplifyNVVMIntrinsic(II, *this))
1879  return I;
1880 
1881  auto SimplifyDemandedVectorEltsLow = [this](Value *Op, unsigned Width,
1882  unsigned DemandedWidth) {
1883  APInt UndefElts(Width, 0);
1884  APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth);
1885  return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
1886  };
1887 
1888  switch (II->getIntrinsicID()) {
1889  default: break;
1890  case Intrinsic::objectsize:
1891  if (ConstantInt *N =
1892  lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
1893  return replaceInstUsesWith(CI, N);
1894  return nullptr;
1895 
1896  case Intrinsic::bswap: {
1897  Value *IIOperand = II->getArgOperand(0);
1898  Value *X = nullptr;
1899 
1900  // TODO should this be in InstSimplify?
1901  // bswap(bswap(x)) -> x
1902  if (match(IIOperand, m_BSwap(m_Value(X))))
1903  return replaceInstUsesWith(CI, X);
1904 
1905  // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
1906  if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
1907  unsigned C = X->getType()->getPrimitiveSizeInBits() -
1908  IIOperand->getType()->getPrimitiveSizeInBits();
1909  Value *CV = ConstantInt::get(X->getType(), C);
1910  Value *V = Builder.CreateLShr(X, CV);
1911  return new TruncInst(V, IIOperand->getType());
1912  }
1913  break;
1914  }
1915 
1916  case Intrinsic::bitreverse: {
1917  Value *IIOperand = II->getArgOperand(0);
1918  Value *X = nullptr;
1919 
1920  // TODO should this be in InstSimplify?
1921  // bitreverse(bitreverse(x)) -> x
1922  if (match(IIOperand, m_BitReverse(m_Value(X))))
1923  return replaceInstUsesWith(CI, X);
1924  break;
1925  }
1926 
1927  case Intrinsic::masked_load:
1928  if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder))
1929  return replaceInstUsesWith(CI, SimplifiedMaskedOp);
1930  break;
1931  case Intrinsic::masked_store:
1932  return simplifyMaskedStore(*II, *this);
1933  case Intrinsic::masked_gather:
1934  return simplifyMaskedGather(*II, *this);
1935  case Intrinsic::masked_scatter:
1936  return simplifyMaskedScatter(*II, *this);
1937 
1938  case Intrinsic::powi:
1939  if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
1940  // powi(x, 0) -> 1.0
1941  if (Power->isZero())
1942  return replaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
1943  // powi(x, 1) -> x
1944  if (Power->isOne())
1945  return replaceInstUsesWith(CI, II->getArgOperand(0));
1946  // powi(x, -1) -> 1/x
1947  if (Power->isMinusOne())
1948  return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
1949  II->getArgOperand(0));
1950  }
1951  break;
1952 
1953  case Intrinsic::cttz:
1954  case Intrinsic::ctlz:
1955  if (auto *I = foldCttzCtlz(*II, *this))
1956  return I;
1957  break;
1958 
1959  case Intrinsic::ctpop:
1960  if (auto *I = foldCtpop(*II, *this))
1961  return I;
1962  break;
1963 
1964  case Intrinsic::uadd_with_overflow:
1965  case Intrinsic::sadd_with_overflow:
1966  case Intrinsic::umul_with_overflow:
1967  case Intrinsic::smul_with_overflow:
1968  if (isa<Constant>(II->getArgOperand(0)) &&
1969  !isa<Constant>(II->getArgOperand(1))) {
1970  // Canonicalize constants into the RHS.
1971  Value *LHS = II->getArgOperand(0);
1972  II->setArgOperand(0, II->getArgOperand(1));
1973  II->setArgOperand(1, LHS);
1974  return II;
1975  }
1977 
1978  case Intrinsic::usub_with_overflow:
1979  case Intrinsic::ssub_with_overflow: {
1980  OverflowCheckFlavor OCF =
1982  assert(OCF != OCF_INVALID && "unexpected!");
1983 
1984  Value *OperationResult = nullptr;
1985  Constant *OverflowResult = nullptr;
1986  if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
1987  *II, OperationResult, OverflowResult))
1988  return CreateOverflowTuple(II, OperationResult, OverflowResult);
1989 
1990  break;
1991  }
1992 
1993  case Intrinsic::minnum:
1994  case Intrinsic::maxnum: {
1995  Value *Arg0 = II->getArgOperand(0);
1996  Value *Arg1 = II->getArgOperand(1);
1997  // Canonicalize constants to the RHS.
1998  if (isa<ConstantFP>(Arg0) && !isa<ConstantFP>(Arg1)) {
1999  II->setArgOperand(0, Arg1);
2000  II->setArgOperand(1, Arg0);
2001  return II;
2002  }
2003  if (Value *V = simplifyMinnumMaxnum(*II))
2004  return replaceInstUsesWith(*II, V);
2005  break;
2006  }
2007  case Intrinsic::fmuladd: {
2008  // Canonicalize fast fmuladd to the separate fmul + fadd.
2009  if (II->hasUnsafeAlgebra()) {
2010  BuilderTy::FastMathFlagGuard Guard(Builder);
2011  Builder.setFastMathFlags(II->getFastMathFlags());
2012  Value *Mul = Builder.CreateFMul(II->getArgOperand(0),
2013  II->getArgOperand(1));
2014  Value *Add = Builder.CreateFAdd(Mul, II->getArgOperand(2));
2015  Add->takeName(II);
2016  return replaceInstUsesWith(*II, Add);
2017  }
2018 
2020  }
2021  case Intrinsic::fma: {
2022  Value *Src0 = II->getArgOperand(0);
2023  Value *Src1 = II->getArgOperand(1);
2024 
2025  // Canonicalize constants into the RHS.
2026  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
2027  II->setArgOperand(0, Src1);
2028  II->setArgOperand(1, Src0);
2029  std::swap(Src0, Src1);
2030  }
2031 
2032  Value *LHS = nullptr;
2033  Value *RHS = nullptr;
2034 
2035  // fma fneg(x), fneg(y), z -> fma x, y, z
2036  if (match(Src0, m_FNeg(m_Value(LHS))) &&
2037  match(Src1, m_FNeg(m_Value(RHS)))) {
2038  II->setArgOperand(0, LHS);
2039  II->setArgOperand(1, RHS);
2040  return II;
2041  }
2042 
2043  // fma fabs(x), fabs(x), z -> fma x, x, z
2044  if (match(Src0, m_Intrinsic<Intrinsic::fabs>(m_Value(LHS))) &&
2045  match(Src1, m_Intrinsic<Intrinsic::fabs>(m_Value(RHS))) && LHS == RHS) {
2046  II->setArgOperand(0, LHS);
2047  II->setArgOperand(1, RHS);
2048  return II;
2049  }
2050 
2051  // fma x, 1, z -> fadd x, z
2052  if (match(Src1, m_FPOne())) {
2053  Instruction *RI = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2));
2054  RI->copyFastMathFlags(II);
2055  return RI;
2056  }
2057 
2058  break;
2059  }
2060  case Intrinsic::fabs: {
2061  Value *Cond;
2062  Constant *LHS, *RHS;
2063  if (match(II->getArgOperand(0),
2064  m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) {
2065  CallInst *Call0 = Builder.CreateCall(II->getCalledFunction(), {LHS});
2066  CallInst *Call1 = Builder.CreateCall(II->getCalledFunction(), {RHS});
2067  return SelectInst::Create(Cond, Call0, Call1);
2068  }
2069 
2071  }
2072  case Intrinsic::ceil:
2073  case Intrinsic::floor:
2074  case Intrinsic::round:
2075  case Intrinsic::nearbyint:
2076  case Intrinsic::rint:
2077  case Intrinsic::trunc: {
2078  Value *ExtSrc;
2079  if (match(II->getArgOperand(0), m_FPExt(m_Value(ExtSrc))) &&
2080  II->getArgOperand(0)->hasOneUse()) {
2081  // fabs (fpext x) -> fpext (fabs x)
2083  { ExtSrc->getType() });
2084  CallInst *NewFabs = Builder.CreateCall(F, ExtSrc);
2085  NewFabs->copyFastMathFlags(II);
2086  NewFabs->takeName(II);
2087  return new FPExtInst(NewFabs, II->getType());
2088  }
2089 
2090  break;
2091  }
2092  case Intrinsic::cos:
2093  case Intrinsic::amdgcn_cos: {
2094  Value *SrcSrc;
2095  Value *Src = II->getArgOperand(0);
2096  if (match(Src, m_FNeg(m_Value(SrcSrc))) ||
2097  match(Src, m_Intrinsic<Intrinsic::fabs>(m_Value(SrcSrc)))) {
2098  // cos(-x) -> cos(x)
2099  // cos(fabs(x)) -> cos(x)
2100  II->setArgOperand(0, SrcSrc);
2101  return II;
2102  }
2103 
2104  break;
2105  }
2106  case Intrinsic::ppc_altivec_lvx:
2107  case Intrinsic::ppc_altivec_lvxl:
2108  // Turn PPC lvx -> load if the pointer is known aligned.
2109  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
2110  &DT) >= 16) {
2111  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2112  PointerType::getUnqual(II->getType()));
2113  return new LoadInst(Ptr);
2114  }
2115  break;
2116  case Intrinsic::ppc_vsx_lxvw4x:
2117  case Intrinsic::ppc_vsx_lxvd2x: {
2118  // Turn PPC VSX loads into normal loads.
2119  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2120  PointerType::getUnqual(II->getType()));
2121  return new LoadInst(Ptr, Twine(""), false, 1);
2122  }
2123  case Intrinsic::ppc_altivec_stvx:
2124  case Intrinsic::ppc_altivec_stvxl:
2125  // Turn stvx -> store if the pointer is known aligned.
2126  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
2127  &DT) >= 16) {
2128  Type *OpPtrTy =
2129  PointerType::getUnqual(II->getArgOperand(0)->getType());
2130  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2131  return new StoreInst(II->getArgOperand(0), Ptr);
2132  }
2133  break;
2134  case Intrinsic::ppc_vsx_stxvw4x:
2135  case Intrinsic::ppc_vsx_stxvd2x: {
2136  // Turn PPC VSX stores into normal stores.
2137  Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType());
2138  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2139  return new StoreInst(II->getArgOperand(0), Ptr, false, 1);
2140  }
2141  case Intrinsic::ppc_qpx_qvlfs:
2142  // Turn PPC QPX qvlfs -> load if the pointer is known aligned.
2143  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
2144  &DT) >= 16) {
2145  Type *VTy = VectorType::get(Builder.getFloatTy(),
2146  II->getType()->getVectorNumElements());
2147  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2148  PointerType::getUnqual(VTy));
2149  Value *Load = Builder.CreateLoad(Ptr);
2150  return new FPExtInst(Load, II->getType());
2151  }
2152  break;
2153  case Intrinsic::ppc_qpx_qvlfd:
2154  // Turn PPC QPX qvlfd -> load if the pointer is known aligned.
2155  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, &AC,
2156  &DT) >= 32) {
2157  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2158  PointerType::getUnqual(II->getType()));
2159  return new LoadInst(Ptr);
2160  }
2161  break;
2162  case Intrinsic::ppc_qpx_qvstfs:
2163  // Turn PPC QPX qvstfs -> store if the pointer is known aligned.
2164  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
2165  &DT) >= 16) {
2166  Type *VTy = VectorType::get(Builder.getFloatTy(),
2167  II->getArgOperand(0)->getType()->getVectorNumElements());
2168  Value *TOp = Builder.CreateFPTrunc(II->getArgOperand(0), VTy);
2169  Type *OpPtrTy = PointerType::getUnqual(VTy);
2170  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2171  return new StoreInst(TOp, Ptr);
2172  }
2173  break;
2174  case Intrinsic::ppc_qpx_qvstfd:
2175  // Turn PPC QPX qvstfd -> store if the pointer is known aligned.
2176  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, &AC,
2177  &DT) >= 32) {
2178  Type *OpPtrTy =
2179  PointerType::getUnqual(II->getArgOperand(0)->getType());
2180  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2181  return new StoreInst(II->getArgOperand(0), Ptr);
2182  }
2183  break;
2184 
2185  case Intrinsic::x86_bmi_bextr_32:
2186  case Intrinsic::x86_bmi_bextr_64:
2187  case Intrinsic::x86_tbm_bextri_u32:
2188  case Intrinsic::x86_tbm_bextri_u64:
2189  // If the RHS is a constant we can try some simplifications.
2190  if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2191  uint64_t Shift = C->getZExtValue();
2192  uint64_t Length = (Shift >> 8) & 0xff;
2193  Shift &= 0xff;
2194  unsigned BitWidth = II->getType()->getIntegerBitWidth();
2195  // If the length is 0 or the shift is out of range, replace with zero.
2196  if (Length == 0 || Shift >= BitWidth)
2197  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
2198  // If the LHS is also a constant, we can completely constant fold this.
2199  if (auto *InC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
2200  uint64_t Result = InC->getZExtValue() >> Shift;
2201  if (Length > BitWidth)
2202  Length = BitWidth;
2203  Result &= maskTrailingOnes<uint64_t>(Length);
2204  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
2205  }
2206  // TODO should we turn this into 'and' if shift is 0? Or 'shl' if we
2207  // are only masking bits that a shift already cleared?
2208  }
2209  break;
2210 
2211  case Intrinsic::x86_bmi_bzhi_32:
2212  case Intrinsic::x86_bmi_bzhi_64:
2213  // If the RHS is a constant we can try some simplifications.
2214  if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2215  uint64_t Index = C->getZExtValue() & 0xff;
2216  unsigned BitWidth = II->getType()->getIntegerBitWidth();
2217  if (Index >= BitWidth)
2218  return replaceInstUsesWith(CI, II->getArgOperand(0));
2219  if (Index == 0)
2220  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
2221  // If the LHS is also a constant, we can completely constant fold this.
2222  if (auto *InC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
2223  uint64_t Result = InC->getZExtValue();
2224  Result &= maskTrailingOnes<uint64_t>(Index);
2225  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
2226  }
2227  // TODO should we convert this to an AND if the RHS is constant?
2228  }
2229  break;
2230 
2231  case Intrinsic::x86_vcvtph2ps_128:
2232  case Intrinsic::x86_vcvtph2ps_256: {
2233  auto Arg = II->getArgOperand(0);
2234  auto ArgType = cast<VectorType>(Arg->getType());
2235  auto RetType = cast<VectorType>(II->getType());
2236  unsigned ArgWidth = ArgType->getNumElements();
2237  unsigned RetWidth = RetType->getNumElements();
2238  assert(RetWidth <= ArgWidth && "Unexpected input/return vector widths");
2239  assert(ArgType->isIntOrIntVectorTy() &&
2240  ArgType->getScalarSizeInBits() == 16 &&
2241  "CVTPH2PS input type should be 16-bit integer vector");
2242  assert(RetType->getScalarType()->isFloatTy() &&
2243  "CVTPH2PS output type should be 32-bit float vector");
2244 
2245  // Constant folding: Convert to generic half to single conversion.
2246  if (isa<ConstantAggregateZero>(Arg))
2247  return replaceInstUsesWith(*II, ConstantAggregateZero::get(RetType));
2248 
2249  if (isa<ConstantDataVector>(Arg)) {
2250  auto VectorHalfAsShorts = Arg;
2251  if (RetWidth < ArgWidth) {
2252  SmallVector<uint32_t, 8> SubVecMask;
2253  for (unsigned i = 0; i != RetWidth; ++i)
2254  SubVecMask.push_back((int)i);
2255  VectorHalfAsShorts = Builder.CreateShuffleVector(
2256  Arg, UndefValue::get(ArgType), SubVecMask);
2257  }
2258 
2259  auto VectorHalfType =
2260  VectorType::get(Type::getHalfTy(II->getContext()), RetWidth);
2261  auto VectorHalfs =
2262  Builder.CreateBitCast(VectorHalfAsShorts, VectorHalfType);
2263  auto VectorFloats = Builder.CreateFPExt(VectorHalfs, RetType);
2264  return replaceInstUsesWith(*II, VectorFloats);
2265  }
2266 
2267  // We only use the lowest lanes of the argument.
2268  if (Value *V = SimplifyDemandedVectorEltsLow(Arg, ArgWidth, RetWidth)) {
2269  II->setArgOperand(0, V);
2270  return II;
2271  }
2272  break;
2273  }
2274 
2275  case Intrinsic::x86_sse_cvtss2si:
2276  case Intrinsic::x86_sse_cvtss2si64:
2277  case Intrinsic::x86_sse_cvttss2si:
2278  case Intrinsic::x86_sse_cvttss2si64:
2279  case Intrinsic::x86_sse2_cvtsd2si:
2280  case Intrinsic::x86_sse2_cvtsd2si64:
2281  case Intrinsic::x86_sse2_cvttsd2si:
2282  case Intrinsic::x86_sse2_cvttsd2si64:
2283  case Intrinsic::x86_avx512_vcvtss2si32:
2284  case Intrinsic::x86_avx512_vcvtss2si64:
2285  case Intrinsic::x86_avx512_vcvtss2usi32:
2286  case Intrinsic::x86_avx512_vcvtss2usi64:
2287  case Intrinsic::x86_avx512_vcvtsd2si32:
2288  case Intrinsic::x86_avx512_vcvtsd2si64:
2289  case Intrinsic::x86_avx512_vcvtsd2usi32:
2290  case Intrinsic::x86_avx512_vcvtsd2usi64:
2291  case Intrinsic::x86_avx512_cvttss2si:
2292  case Intrinsic::x86_avx512_cvttss2si64:
2293  case Intrinsic::x86_avx512_cvttss2usi:
2294  case Intrinsic::x86_avx512_cvttss2usi64:
2295  case Intrinsic::x86_avx512_cvttsd2si:
2296  case Intrinsic::x86_avx512_cvttsd2si64:
2297  case Intrinsic::x86_avx512_cvttsd2usi:
2298  case Intrinsic::x86_avx512_cvttsd2usi64: {
2299  // These intrinsics only demand the 0th element of their input vectors. If
2300  // we can simplify the input based on that, do so now.
2301  Value *Arg = II->getArgOperand(0);
2302  unsigned VWidth = Arg->getType()->getVectorNumElements();
2303  if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2304  II->setArgOperand(0, V);
2305  return II;
2306  }
2307  break;
2308  }
2309 
2310  case Intrinsic::x86_mmx_pmovmskb:
2311  case Intrinsic::x86_sse_movmsk_ps:
2312  case Intrinsic::x86_sse2_movmsk_pd:
2313  case Intrinsic::x86_sse2_pmovmskb_128:
2314  case Intrinsic::x86_avx_movmsk_pd_256:
2315  case Intrinsic::x86_avx_movmsk_ps_256:
2316  case Intrinsic::x86_avx2_pmovmskb: {
2317  if (Value *V = simplifyX86movmsk(*II))
2318  return replaceInstUsesWith(*II, V);
2319  break;
2320  }
2321 
2322  case Intrinsic::x86_sse_comieq_ss:
2323  case Intrinsic::x86_sse_comige_ss:
2324  case Intrinsic::x86_sse_comigt_ss:
2325  case Intrinsic::x86_sse_comile_ss:
2326  case Intrinsic::x86_sse_comilt_ss:
2327  case Intrinsic::x86_sse_comineq_ss:
2328  case Intrinsic::x86_sse_ucomieq_ss:
2329  case Intrinsic::x86_sse_ucomige_ss:
2330  case Intrinsic::x86_sse_ucomigt_ss:
2331  case Intrinsic::x86_sse_ucomile_ss:
2332  case Intrinsic::x86_sse_ucomilt_ss:
2333  case Intrinsic::x86_sse_ucomineq_ss:
2334  case Intrinsic::x86_sse2_comieq_sd:
2335  case Intrinsic::x86_sse2_comige_sd:
2336  case Intrinsic::x86_sse2_comigt_sd:
2337  case Intrinsic::x86_sse2_comile_sd:
2338  case Intrinsic::x86_sse2_comilt_sd:
2339  case Intrinsic::x86_sse2_comineq_sd:
2340  case Intrinsic::x86_sse2_ucomieq_sd:
2341  case Intrinsic::x86_sse2_ucomige_sd:
2342  case Intrinsic::x86_sse2_ucomigt_sd:
2343  case Intrinsic::x86_sse2_ucomile_sd:
2344  case Intrinsic::x86_sse2_ucomilt_sd:
2345  case Intrinsic::x86_sse2_ucomineq_sd:
2346  case Intrinsic::x86_avx512_vcomi_ss:
2347  case Intrinsic::x86_avx512_vcomi_sd:
2348  case Intrinsic::x86_avx512_mask_cmp_ss:
2349  case Intrinsic::x86_avx512_mask_cmp_sd: {
2350  // These intrinsics only demand the 0th element of their input vectors. If
2351  // we can simplify the input based on that, do so now.
2352  bool MadeChange = false;
2353  Value *Arg0 = II->getArgOperand(0);
2354  Value *Arg1 = II->getArgOperand(1);
2355  unsigned VWidth = Arg0->getType()->getVectorNumElements();
2356  if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2357  II->setArgOperand(0, V);
2358  MadeChange = true;
2359  }
2360  if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2361  II->setArgOperand(1, V);
2362  MadeChange = true;
2363  }
2364  if (MadeChange)
2365  return II;
2366  break;
2367  }
2368  case Intrinsic::x86_avx512_mask_cmp_pd_128:
2369  case Intrinsic::x86_avx512_mask_cmp_pd_256:
2370  case Intrinsic::x86_avx512_mask_cmp_pd_512:
2371  case Intrinsic::x86_avx512_mask_cmp_ps_128:
2372  case Intrinsic::x86_avx512_mask_cmp_ps_256:
2373  case Intrinsic::x86_avx512_mask_cmp_ps_512: {
2374  // Folding cmp(sub(a,b),0) -> cmp(a,b) and cmp(0,sub(a,b)) -> cmp(b,a)
2375  Value *Arg0 = II->getArgOperand(0);
2376  Value *Arg1 = II->getArgOperand(1);
2377  bool Arg0IsZero = match(Arg0, m_Zero());
2378  if (Arg0IsZero)
2379  std::swap(Arg0, Arg1);
2380  Value *A, *B;
2381  // This fold requires only the NINF(not +/- inf) since inf minus
2382  // inf is nan.
2383  // NSZ(No Signed Zeros) is not needed because zeros of any sign are
2384  // equal for both compares.
2385  // NNAN is not needed because nans compare the same for both compares.
2386  // The compare intrinsic uses the above assumptions and therefore
2387  // doesn't require additional flags.
2388  if ((match(Arg0, m_OneUse(m_FSub(m_Value(A), m_Value(B)))) &&
2389  match(Arg1, m_Zero()) &&
2390  cast<Instruction>(Arg0)->getFastMathFlags().noInfs())) {
2391  if (Arg0IsZero)
2392  std::swap(A, B);
2393  II->setArgOperand(0, A);
2394  II->setArgOperand(1, B);
2395  return II;
2396  }
2397  break;
2398  }
2399 
2400  case Intrinsic::x86_avx512_mask_add_ps_512:
2401  case Intrinsic::x86_avx512_mask_div_ps_512:
2402  case Intrinsic::x86_avx512_mask_mul_ps_512:
2403  case Intrinsic::x86_avx512_mask_sub_ps_512:
2404  case Intrinsic::x86_avx512_mask_add_pd_512:
2405  case Intrinsic::x86_avx512_mask_div_pd_512:
2406  case Intrinsic::x86_avx512_mask_mul_pd_512:
2407  case Intrinsic::x86_avx512_mask_sub_pd_512:
2408  // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
2409  // IR operations.
2410  if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
2411  if (R->getValue() == 4) {
2412  Value *Arg0 = II->getArgOperand(0);
2413  Value *Arg1 = II->getArgOperand(1);
2414 
2415  Value *V;
2416  switch (II->getIntrinsicID()) {
2417  default: llvm_unreachable("Case stmts out of sync!");
2418  case Intrinsic::x86_avx512_mask_add_ps_512:
2419  case Intrinsic::x86_avx512_mask_add_pd_512:
2420  V = Builder.CreateFAdd(Arg0, Arg1);
2421  break;
2422  case Intrinsic::x86_avx512_mask_sub_ps_512:
2423  case Intrinsic::x86_avx512_mask_sub_pd_512:
2424  V = Builder.CreateFSub(Arg0, Arg1);
2425  break;
2426  case Intrinsic::x86_avx512_mask_mul_ps_512:
2427  case Intrinsic::x86_avx512_mask_mul_pd_512:
2428  V = Builder.CreateFMul(Arg0, Arg1);
2429  break;
2430  case Intrinsic::x86_avx512_mask_div_ps_512:
2431  case Intrinsic::x86_avx512_mask_div_pd_512:
2432  V = Builder.CreateFDiv(Arg0, Arg1);
2433  break;
2434  }
2435 
2436  // Create a select for the masking.
2437  V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2),
2438  Builder);
2439  return replaceInstUsesWith(*II, V);
2440  }
2441  }
2442  break;
2443 
2444  case Intrinsic::x86_avx512_mask_add_ss_round:
2445  case Intrinsic::x86_avx512_mask_div_ss_round:
2446  case Intrinsic::x86_avx512_mask_mul_ss_round:
2447  case Intrinsic::x86_avx512_mask_sub_ss_round:
2448  case Intrinsic::x86_avx512_mask_add_sd_round:
2449  case Intrinsic::x86_avx512_mask_div_sd_round:
2450  case Intrinsic::x86_avx512_mask_mul_sd_round:
2451  case Intrinsic::x86_avx512_mask_sub_sd_round:
2452  // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
2453  // IR operations.
2454  if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
2455  if (R->getValue() == 4) {
2456  // Extract the element as scalars.
2457  Value *Arg0 = II->getArgOperand(0);
2458  Value *Arg1 = II->getArgOperand(1);
2459  Value *LHS = Builder.CreateExtractElement(Arg0, (uint64_t)0);
2460  Value *RHS = Builder.CreateExtractElement(Arg1, (uint64_t)0);
2461 
2462  Value *V;
2463  switch (II->getIntrinsicID()) {
2464  default: llvm_unreachable("Case stmts out of sync!");
2465  case Intrinsic::x86_avx512_mask_add_ss_round:
2466  case Intrinsic::x86_avx512_mask_add_sd_round:
2467  V = Builder.CreateFAdd(LHS, RHS);
2468  break;
2469  case Intrinsic::x86_avx512_mask_sub_ss_round:
2470  case Intrinsic::x86_avx512_mask_sub_sd_round:
2471  V = Builder.CreateFSub(LHS, RHS);
2472  break;
2473  case Intrinsic::x86_avx512_mask_mul_ss_round:
2474  case Intrinsic::x86_avx512_mask_mul_sd_round:
2475  V = Builder.CreateFMul(LHS, RHS);
2476  break;
2477  case Intrinsic::x86_avx512_mask_div_ss_round:
2478  case Intrinsic::x86_avx512_mask_div_sd_round:
2479  V = Builder.CreateFDiv(LHS, RHS);
2480  break;
2481  }
2482 
2483  // Handle the masking aspect of the intrinsic.
2484  Value *Mask = II->getArgOperand(3);
2485  auto *C = dyn_cast<ConstantInt>(Mask);
2486  // We don't need a select if we know the mask bit is a 1.
2487  if (!C || !C->getValue()[0]) {
2488  // Cast the mask to an i1 vector and then extract the lowest element.
2489  auto *MaskTy = VectorType::get(Builder.getInt1Ty(),
2490  cast<IntegerType>(Mask->getType())->getBitWidth());
2491  Mask = Builder.CreateBitCast(Mask, MaskTy);
2492  Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
2493  // Extract the lowest element from the passthru operand.
2494  Value *Passthru = Builder.CreateExtractElement(II->getArgOperand(2),
2495  (uint64_t)0);
2496  V = Builder.CreateSelect(Mask, V, Passthru);
2497  }
2498 
2499  // Insert the result back into the original argument 0.
2500  V = Builder.CreateInsertElement(Arg0, V, (uint64_t)0);
2501 
2502  return replaceInstUsesWith(*II, V);
2503  }
2504  }
2506 
2507  // X86 scalar intrinsics simplified with SimplifyDemandedVectorElts.
2508  case Intrinsic::x86_avx512_mask_max_ss_round:
2509  case Intrinsic::x86_avx512_mask_min_ss_round:
2510  case Intrinsic::x86_avx512_mask_max_sd_round:
2511  case Intrinsic::x86_avx512_mask_min_sd_round:
2512  case Intrinsic::x86_avx512_mask_vfmadd_ss:
2513  case Intrinsic::x86_avx512_mask_vfmadd_sd:
2514  case Intrinsic::x86_avx512_maskz_vfmadd_ss:
2515  case Intrinsic::x86_avx512_maskz_vfmadd_sd:
2516  case Intrinsic::x86_avx512_mask3_vfmadd_ss:
2517  case Intrinsic::x86_avx512_mask3_vfmadd_sd:
2518  case Intrinsic::x86_avx512_mask3_vfmsub_ss:
2519  case Intrinsic::x86_avx512_mask3_vfmsub_sd:
2520  case Intrinsic::x86_avx512_mask3_vfnmsub_ss:
2521  case Intrinsic::x86_avx512_mask3_vfnmsub_sd:
2522  case Intrinsic::x86_fma_vfmadd_ss:
2523  case Intrinsic::x86_fma_vfmsub_ss:
2524  case Intrinsic::x86_fma_vfnmadd_ss:
2525  case Intrinsic::x86_fma_vfnmsub_ss:
2526  case Intrinsic::x86_fma_vfmadd_sd:
2527  case Intrinsic::x86_fma_vfmsub_sd:
2528  case Intrinsic::x86_fma_vfnmadd_sd:
2529  case Intrinsic::x86_fma_vfnmsub_sd:
2530  case Intrinsic::x86_sse_cmp_ss:
2531  case Intrinsic::x86_sse_min_ss:
2532  case Intrinsic::x86_sse_max_ss:
2533  case Intrinsic::x86_sse2_cmp_sd:
2534  case Intrinsic::x86_sse2_min_sd:
2535  case Intrinsic::x86_sse2_max_sd:
2536  case Intrinsic::x86_sse41_round_ss:
2537  case Intrinsic::x86_sse41_round_sd:
2538  case Intrinsic::x86_xop_vfrcz_ss:
2539  case Intrinsic::x86_xop_vfrcz_sd: {
2540  unsigned VWidth = II->getType()->getVectorNumElements();
2541  APInt UndefElts(VWidth, 0);
2542  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
2543  if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
2544  if (V != II)
2545  return replaceInstUsesWith(*II, V);
2546  return II;
2547  }
2548  break;
2549  }
2550 
2551  // Constant fold ashr( <A x Bi>, Ci ).
2552  // Constant fold lshr( <A x Bi>, Ci ).
2553  // Constant fold shl( <A x Bi>, Ci ).
2554  case Intrinsic::x86_sse2_psrai_d:
2555  case Intrinsic::x86_sse2_psrai_w:
2556  case Intrinsic::x86_avx2_psrai_d:
2557  case Intrinsic::x86_avx2_psrai_w:
2558  case Intrinsic::x86_avx512_psrai_q_128:
2559  case Intrinsic::x86_avx512_psrai_q_256:
2560  case Intrinsic::x86_avx512_psrai_d_512:
2561  case Intrinsic::x86_avx512_psrai_q_512:
2562  case Intrinsic::x86_avx512_psrai_w_512:
2563  case Intrinsic::x86_sse2_psrli_d:
2564  case Intrinsic::x86_sse2_psrli_q:
2565  case Intrinsic::x86_sse2_psrli_w:
2566  case Intrinsic::x86_avx2_psrli_d:
2567  case Intrinsic::x86_avx2_psrli_q:
2568  case Intrinsic::x86_avx2_psrli_w:
2569  case Intrinsic::x86_avx512_psrli_d_512:
2570  case Intrinsic::x86_avx512_psrli_q_512:
2571  case Intrinsic::x86_avx512_psrli_w_512:
2572  case Intrinsic::x86_sse2_pslli_d:
2573  case Intrinsic::x86_sse2_pslli_q:
2574  case Intrinsic::x86_sse2_pslli_w:
2575  case Intrinsic::x86_avx2_pslli_d:
2576  case Intrinsic::x86_avx2_pslli_q:
2577  case Intrinsic::x86_avx2_pslli_w:
2578  case Intrinsic::x86_avx512_pslli_d_512:
2579  case Intrinsic::x86_avx512_pslli_q_512:
2580  case Intrinsic::x86_avx512_pslli_w_512:
2581  if (Value *V = simplifyX86immShift(*II, Builder))
2582  return replaceInstUsesWith(*II, V);
2583  break;
2584 
2585  case Intrinsic::x86_sse2_psra_d:
2586  case Intrinsic::x86_sse2_psra_w:
2587  case Intrinsic::x86_avx2_psra_d:
2588  case Intrinsic::x86_avx2_psra_w:
2589  case Intrinsic::x86_avx512_psra_q_128:
2590  case Intrinsic::x86_avx512_psra_q_256:
2591  case Intrinsic::x86_avx512_psra_d_512:
2592  case Intrinsic::x86_avx512_psra_q_512:
2593  case Intrinsic::x86_avx512_psra_w_512:
2594  case Intrinsic::x86_sse2_psrl_d:
2595  case Intrinsic::x86_sse2_psrl_q:
2596  case Intrinsic::x86_sse2_psrl_w:
2597  case Intrinsic::x86_avx2_psrl_d:
2598  case Intrinsic::x86_avx2_psrl_q:
2599  case Intrinsic::x86_avx2_psrl_w:
2600  case Intrinsic::x86_avx512_psrl_d_512:
2601  case Intrinsic::x86_avx512_psrl_q_512:
2602  case Intrinsic::x86_avx512_psrl_w_512:
2603  case Intrinsic::x86_sse2_psll_d:
2604  case Intrinsic::x86_sse2_psll_q:
2605  case Intrinsic::x86_sse2_psll_w:
2606  case Intrinsic::x86_avx2_psll_d:
2607  case Intrinsic::x86_avx2_psll_q:
2608  case Intrinsic::x86_avx2_psll_w:
2609  case Intrinsic::x86_avx512_psll_d_512:
2610  case Intrinsic::x86_avx512_psll_q_512:
2611  case Intrinsic::x86_avx512_psll_w_512: {
2612  if (Value *V = simplifyX86immShift(*II, Builder))
2613  return replaceInstUsesWith(*II, V);
2614 
2615  // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector
2616  // operand to compute the shift amount.
2617  Value *Arg1 = II->getArgOperand(1);
2618  assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 &&
2619  "Unexpected packed shift size");
2620  unsigned VWidth = Arg1->getType()->getVectorNumElements();
2621 
2622  if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2623  II->setArgOperand(1, V);
2624  return II;
2625  }
2626  break;
2627  }
2628 
2629  case Intrinsic::x86_avx2_psllv_d:
2630  case Intrinsic::x86_avx2_psllv_d_256:
2631  case Intrinsic::x86_avx2_psllv_q:
2632  case Intrinsic::x86_avx2_psllv_q_256:
2633  case Intrinsic::x86_avx512_psllv_d_512:
2634  case Intrinsic::x86_avx512_psllv_q_512:
2635  case Intrinsic::x86_avx512_psllv_w_128:
2636  case Intrinsic::x86_avx512_psllv_w_256:
2637  case Intrinsic::x86_avx512_psllv_w_512:
2638  case Intrinsic::x86_avx2_psrav_d:
2639  case Intrinsic::x86_avx2_psrav_d_256:
2640  case Intrinsic::x86_avx512_psrav_q_128:
2641  case Intrinsic::x86_avx512_psrav_q_256:
2642  case Intrinsic::x86_avx512_psrav_d_512:
2643  case Intrinsic::x86_avx512_psrav_q_512:
2644  case Intrinsic::x86_avx512_psrav_w_128:
2645  case Intrinsic::x86_avx512_psrav_w_256:
2646  case Intrinsic::x86_avx512_psrav_w_512:
2647  case Intrinsic::x86_avx2_psrlv_d:
2648  case Intrinsic::x86_avx2_psrlv_d_256:
2649  case Intrinsic::x86_avx2_psrlv_q:
2650  case Intrinsic::x86_avx2_psrlv_q_256:
2651  case Intrinsic::x86_avx512_psrlv_d_512:
2652  case Intrinsic::x86_avx512_psrlv_q_512:
2653  case Intrinsic::x86_avx512_psrlv_w_128:
2654  case Intrinsic::x86_avx512_psrlv_w_256:
2655  case Intrinsic::x86_avx512_psrlv_w_512:
2656  if (Value *V = simplifyX86varShift(*II, Builder))
2657  return replaceInstUsesWith(*II, V);
2658  break;
2659 
2660  case Intrinsic::x86_sse2_pmulu_dq:
2661  case Intrinsic::x86_sse41_pmuldq:
2662  case Intrinsic::x86_avx2_pmul_dq:
2663  case Intrinsic::x86_avx2_pmulu_dq:
2664  case Intrinsic::x86_avx512_pmul_dq_512:
2665  case Intrinsic::x86_avx512_pmulu_dq_512: {
2666  if (Value *V = simplifyX86muldq(*II, Builder))
2667  return replaceInstUsesWith(*II, V);
2668 
2669  unsigned VWidth = II->getType()->getVectorNumElements();
2670  APInt UndefElts(VWidth, 0);
2671  APInt DemandedElts = APInt::getAllOnesValue(VWidth);
2672  if (Value *V = SimplifyDemandedVectorElts(II, DemandedElts, UndefElts)) {
2673  if (V != II)
2674  return replaceInstUsesWith(*II, V);
2675  return II;
2676  }
2677  break;
2678  }
2679 
2680  case Intrinsic::x86_sse2_packssdw_128:
2681  case Intrinsic::x86_sse2_packsswb_128:
2682  case Intrinsic::x86_avx2_packssdw:
2683  case Intrinsic::x86_avx2_packsswb:
2684  case Intrinsic::x86_avx512_packssdw_512:
2685  case Intrinsic::x86_avx512_packsswb_512:
2686  if (Value *V = simplifyX86pack(*II, true))
2687  return replaceInstUsesWith(*II, V);
2688  break;
2689 
2690  case Intrinsic::x86_sse2_packuswb_128:
2691  case Intrinsic::x86_sse41_packusdw:
2692  case Intrinsic::x86_avx2_packusdw:
2693  case Intrinsic::x86_avx2_packuswb:
2694  case Intrinsic::x86_avx512_packusdw_512:
2695  case Intrinsic::x86_avx512_packuswb_512:
2696  if (Value *V = simplifyX86pack(*II, false))
2697  return replaceInstUsesWith(*II, V);
2698  break;
2699 
2700  case Intrinsic::x86_pclmulqdq: {
2701  if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
2702  unsigned Imm = C->getZExtValue();
2703 
2704  bool MadeChange = false;
2705  Value *Arg0 = II->getArgOperand(0);
2706  Value *Arg1 = II->getArgOperand(1);
2707  unsigned VWidth = Arg0->getType()->getVectorNumElements();
2708  APInt DemandedElts(VWidth, 0);
2709 
2710  APInt UndefElts1(VWidth, 0);
2711  DemandedElts = (Imm & 0x01) ? 2 : 1;
2712  if (Value *V = SimplifyDemandedVectorElts(Arg0, DemandedElts,
2713  UndefElts1)) {
2714  II->setArgOperand(0, V);
2715  MadeChange = true;
2716  }
2717 
2718  APInt UndefElts2(VWidth, 0);
2719  DemandedElts = (Imm & 0x10) ? 2 : 1;
2720  if (Value *V = SimplifyDemandedVectorElts(Arg1, DemandedElts,
2721  UndefElts2)) {
2722  II->setArgOperand(1, V);
2723  MadeChange = true;
2724  }
2725 
2726  // If both input elements are undef, the result is undef.
2727  if (UndefElts1[(Imm & 0x01) ? 1 : 0] ||
2728  UndefElts2[(Imm & 0x10) ? 1 : 0])
2729  return replaceInstUsesWith(*II,
2730  ConstantAggregateZero::get(II->getType()));
2731 
2732  if (MadeChange)
2733  return II;
2734  }
2735  break;
2736  }
2737 
2738  case Intrinsic::x86_sse41_insertps:
2739  if (Value *V = simplifyX86insertps(*II, Builder))
2740  return replaceInstUsesWith(*II, V);
2741  break;
2742 
2743  case Intrinsic::x86_sse4a_extrq: {
2744  Value *Op0 = II->getArgOperand(0);
2745  Value *Op1 = II->getArgOperand(1);
2746  unsigned VWidth0 = Op0->getType()->getVectorNumElements();
2747  unsigned VWidth1 = Op1->getType()->getVectorNumElements();
2748  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2749  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2750  VWidth1 == 16 && "Unexpected operand sizes");
2751 
2752  // See if we're dealing with constant values.
2753  Constant *C1 = dyn_cast<Constant>(Op1);
2754  ConstantInt *CILength =
2755  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
2756  : nullptr;
2757  ConstantInt *CIIndex =
2758  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2759  : nullptr;
2760 
2761  // Attempt to simplify to a constant, shuffle vector or EXTRQI call.
2762  if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder))
2763  return replaceInstUsesWith(*II, V);
2764 
2765  // EXTRQ only uses the lowest 64-bits of the first 128-bit vector
2766  // operands and the lowest 16-bits of the second.
2767  bool MadeChange = false;
2768  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2769  II->setArgOperand(0, V);
2770  MadeChange = true;
2771  }
2772  if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2773  II->setArgOperand(1, V);
2774  MadeChange = true;
2775  }
2776  if (MadeChange)
2777  return II;
2778  break;
2779  }
2780 
2781  case Intrinsic::x86_sse4a_extrqi: {
2782  // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining
2783  // bits of the lower 64-bits. The upper 64-bits are undefined.
2784  Value *Op0 = II->getArgOperand(0);
2785  unsigned VWidth = Op0->getType()->getVectorNumElements();
2786  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2787  "Unexpected operand size");
2788 
2789  // See if we're dealing with constant values.
2790  ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(1));
2791  ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(2));
2792 
2793  // Attempt to simplify to a constant or shuffle vector.
2794  if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder))
2795  return replaceInstUsesWith(*II, V);
2796 
2797  // EXTRQI only uses the lowest 64-bits of the first 128-bit vector
2798  // operand.
2799  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2800  II->setArgOperand(0, V);
2801  return II;
2802  }
2803  break;
2804  }
2805 
2806  case Intrinsic::x86_sse4a_insertq: {
2807  Value *Op0 = II->getArgOperand(0);
2808  Value *Op1 = II->getArgOperand(1);
2809  unsigned VWidth = Op0->getType()->getVectorNumElements();
2810  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2811  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2812  Op1->getType()->getVectorNumElements() == 2 &&
2813  "Unexpected operand size");
2814 
2815  // See if we're dealing with constant values.
2816  Constant *C1 = dyn_cast<Constant>(Op1);
2817  ConstantInt *CI11 =
2818  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2819  : nullptr;
2820 
2821  // Attempt to simplify to a constant, shuffle vector or INSERTQI call.
2822  if (CI11) {
2823  const APInt &V11 = CI11->getValue();
2824  APInt Len = V11.zextOrTrunc(6);
2825  APInt Idx = V11.lshr(8).zextOrTrunc(6);
2826  if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder))
2827  return replaceInstUsesWith(*II, V);
2828  }
2829 
2830  // INSERTQ only uses the lowest 64-bits of the first 128-bit vector
2831  // operand.
2832  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2833  II->setArgOperand(0, V);
2834  return II;
2835  }
2836  break;
2837  }
2838 
2839  case Intrinsic::x86_sse4a_insertqi: {
2840  // INSERTQI: Extract lowest Length bits from lower half of second source and
2841  // insert over first source starting at Index bit. The upper 64-bits are
2842  // undefined.
2843  Value *Op0 = II->getArgOperand(0);
2844  Value *Op1 = II->getArgOperand(1);
2845  unsigned VWidth0 = Op0->getType()->getVectorNumElements();
2846  unsigned VWidth1 = Op1->getType()->getVectorNumElements();
2847  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2848  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2849  VWidth1 == 2 && "Unexpected operand sizes");
2850 
2851  // See if we're dealing with constant values.
2852  ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(2));
2853  ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(3));
2854 
2855  // Attempt to simplify to a constant or shuffle vector.
2856  if (CILength && CIIndex) {
2857  APInt Len = CILength->getValue().zextOrTrunc(6);
2858  APInt Idx = CIIndex->getValue().zextOrTrunc(6);
2859  if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder))
2860  return replaceInstUsesWith(*II, V);
2861  }
2862 
2863  // INSERTQI only uses the lowest 64-bits of the first two 128-bit vector
2864  // operands.
2865  bool MadeChange = false;
2866  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2867  II->setArgOperand(0, V);
2868  MadeChange = true;
2869  }
2870  if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2871  II->setArgOperand(1, V);
2872  MadeChange = true;
2873  }
2874  if (MadeChange)
2875  return II;
2876  break;
2877  }
2878 
2879  case Intrinsic::x86_sse41_pblendvb:
2880  case Intrinsic::x86_sse41_blendvps:
2881  case Intrinsic::x86_sse41_blendvpd:
2882  case Intrinsic::x86_avx_blendv_ps_256:
2883  case Intrinsic::x86_avx_blendv_pd_256:
2884  case Intrinsic::x86_avx2_pblendvb: {
2885  // Convert blendv* to vector selects if the mask is constant.
2886  // This optimization is convoluted because the intrinsic is defined as
2887  // getting a vector of floats or doubles for the ps and pd versions.
2888  // FIXME: That should be changed.
2889 
2890  Value *Op0 = II->getArgOperand(0);
2891  Value *Op1 = II->getArgOperand(1);
2892  Value *Mask = II->getArgOperand(2);
2893 
2894  // fold (blend A, A, Mask) -> A
2895  if (Op0 == Op1)
2896  return replaceInstUsesWith(CI, Op0);
2897 
2898  // Zero Mask - select 1st argument.
2899  if (isa<ConstantAggregateZero>(Mask))
2900  return replaceInstUsesWith(CI, Op0);
2901 
2902  // Constant Mask - select 1st/2nd argument lane based on top bit of mask.
2903  if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
2904  Constant *NewSelector = getNegativeIsTrueBoolVec(ConstantMask);
2905  return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
2906  }
2907  break;
2908  }
2909 
2910  case Intrinsic::x86_ssse3_pshuf_b_128:
2911  case Intrinsic::x86_avx2_pshuf_b:
2912  case Intrinsic::x86_avx512_pshuf_b_512:
2913  if (Value *V = simplifyX86pshufb(*II, Builder))
2914  return replaceInstUsesWith(*II, V);
2915  break;
2916 
2917  case Intrinsic::x86_avx_vpermilvar_ps:
2918  case Intrinsic::x86_avx_vpermilvar_ps_256:
2919  case Intrinsic::x86_avx512_vpermilvar_ps_512:
2920  case Intrinsic::x86_avx_vpermilvar_pd:
2921  case Intrinsic::x86_avx_vpermilvar_pd_256:
2922  case Intrinsic::x86_avx512_vpermilvar_pd_512:
2923  if (Value *V = simplifyX86vpermilvar(*II, Builder))
2924  return replaceInstUsesWith(*II, V);
2925  break;
2926 
2927  case Intrinsic::x86_avx2_permd:
2928  case Intrinsic::x86_avx2_permps:
2929  if (Value *V = simplifyX86vpermv(*II, Builder))
2930  return replaceInstUsesWith(*II, V);
2931  break;
2932 
2933  case Intrinsic::x86_avx512_mask_permvar_df_256:
2934  case Intrinsic::x86_avx512_mask_permvar_df_512:
2935  case Intrinsic::x86_avx512_mask_permvar_di_256:
2936  case Intrinsic::x86_avx512_mask_permvar_di_512:
2937  case Intrinsic::x86_avx512_mask_permvar_hi_128:
2938  case Intrinsic::x86_avx512_mask_permvar_hi_256:
2939  case Intrinsic::x86_avx512_mask_permvar_hi_512:
2940  case Intrinsic::x86_avx512_mask_permvar_qi_128:
2941  case Intrinsic::x86_avx512_mask_permvar_qi_256:
2942  case Intrinsic::x86_avx512_mask_permvar_qi_512:
2943  case Intrinsic::x86_avx512_mask_permvar_sf_256:
2944  case Intrinsic::x86_avx512_mask_permvar_sf_512:
2945  case Intrinsic::x86_avx512_mask_permvar_si_256:
2946  case Intrinsic::x86_avx512_mask_permvar_si_512:
2947  if (Value *V = simplifyX86vpermv(*II, Builder)) {
2948  // We simplified the permuting, now create a select for the masking.
2949  V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2),
2950  Builder);
2951  return replaceInstUsesWith(*II, V);
2952  }
2953  break;
2954 
2955  case Intrinsic::x86_avx_maskload_ps:
2956  case Intrinsic::x86_avx_maskload_pd:
2957  case Intrinsic::x86_avx_maskload_ps_256:
2958  case Intrinsic::x86_avx_maskload_pd_256:
2959  case Intrinsic::x86_avx2_maskload_d:
2960  case Intrinsic::x86_avx2_maskload_q:
2961  case Intrinsic::x86_avx2_maskload_d_256:
2962  case Intrinsic::x86_avx2_maskload_q_256:
2963  if (Instruction *I = simplifyX86MaskedLoad(*II, *this))
2964  return I;
2965  break;
2966 
2967  case Intrinsic::x86_sse2_maskmov_dqu:
2968  case Intrinsic::x86_avx_maskstore_ps:
2969  case Intrinsic::x86_avx_maskstore_pd:
2970  case Intrinsic::x86_avx_maskstore_ps_256:
2971  case Intrinsic::x86_avx_maskstore_pd_256:
2972  case Intrinsic::x86_avx2_maskstore_d:
2973  case Intrinsic::x86_avx2_maskstore_q:
2974  case Intrinsic::x86_avx2_maskstore_d_256:
2975  case Intrinsic::x86_avx2_maskstore_q_256:
2976  if (simplifyX86MaskedStore(*II, *this))
2977  return nullptr;
2978  break;
2979 
2980  case Intrinsic::x86_xop_vpcomb:
2981  case Intrinsic::x86_xop_vpcomd:
2982  case Intrinsic::x86_xop_vpcomq:
2983  case Intrinsic::x86_xop_vpcomw:
2984  if (Value *V = simplifyX86vpcom(*II, Builder, true))
2985  return replaceInstUsesWith(*II, V);
2986  break;
2987 
2988  case Intrinsic::x86_xop_vpcomub:
2989  case Intrinsic::x86_xop_vpcomud:
2990  case Intrinsic::x86_xop_vpcomuq:
2991  case Intrinsic::x86_xop_vpcomuw:
2992  if (Value *V = simplifyX86vpcom(*II, Builder, false))
2993  return replaceInstUsesWith(*II, V);
2994  break;
2995 
2996  case Intrinsic::ppc_altivec_vperm:
2997  // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
2998  // Note that ppc_altivec_vperm has a big-endian bias, so when creating
2999  // a vectorshuffle for little endian, we must undo the transformation
3000  // performed on vec_perm in altivec.h. That is, we must complement
3001  // the permutation mask with respect to 31 and reverse the order of
3002  // V1 and V2.
3003  if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
3004  assert(Mask->getType()->getVectorNumElements() == 16 &&
3005  "Bad type for intrinsic!");
3006 
3007  // Check that all of the elements are integer constants or undefs.
3008  bool AllEltsOk = true;
3009  for (unsigned i = 0; i != 16; ++i) {
3010  Constant *Elt = Mask->getAggregateElement(i);
3011  if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
3012  AllEltsOk = false;
3013  break;
3014  }
3015  }
3016 
3017  if (AllEltsOk) {
3018  // Cast the input vectors to byte vectors.
3019  Value *Op0 = Builder.CreateBitCast(II->getArgOperand(0),
3020  Mask->getType());
3021  Value *Op1 = Builder.CreateBitCast(II->getArgOperand(1),
3022  Mask->getType());
3023  Value *Result = UndefValue::get(Op0->getType());
3024 
3025  // Only extract each element once.
3026  Value *ExtractedElts[32];
3027  memset(ExtractedElts, 0, sizeof(ExtractedElts));
3028 
3029  for (unsigned i = 0; i != 16; ++i) {
3030  if (isa<UndefValue>(Mask->getAggregateElement(i)))
3031  continue;
3032  unsigned Idx =
3033  cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
3034  Idx &= 31; // Match the hardware behavior.
3035  if (DL.isLittleEndian())
3036  Idx = 31 - Idx;
3037 
3038  if (!ExtractedElts[Idx]) {
3039  Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
3040  Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
3041  ExtractedElts[Idx] =
3042  Builder.CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
3043  Builder.getInt32(Idx&15));
3044  }
3045 
3046  // Insert this value into the result vector.
3047  Result = Builder.CreateInsertElement(Result, ExtractedElts[Idx],
3048  Builder.getInt32(i));
3049  }
3050  return CastInst::Create(Instruction::BitCast, Result, CI.getType());
3051  }
3052  }
3053  break;
3054 
3055  case Intrinsic::arm_neon_vld1:
3056  case Intrinsic::arm_neon_vld2:
3057  case Intrinsic::arm_neon_vld3:
3058  case Intrinsic::arm_neon_vld4:
3059  case Intrinsic::arm_neon_vld2lane:
3060  case Intrinsic::arm_neon_vld3lane:
3061  case Intrinsic::arm_neon_vld4lane:
3062  case Intrinsic::arm_neon_vst1:
3063  case Intrinsic::arm_neon_vst2:
3064  case Intrinsic::arm_neon_vst3:
3065  case Intrinsic::arm_neon_vst4:
3066  case Intrinsic::arm_neon_vst2lane:
3067  case Intrinsic::arm_neon_vst3lane:
3068  case Intrinsic::arm_neon_vst4lane: {
3069  unsigned MemAlign =
3070  getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
3071  unsigned AlignArg = II->getNumArgOperands() - 1;
3072  ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
3073  if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
3074  II->setArgOperand(AlignArg,
3075  ConstantInt::get(Type::getInt32Ty(II->getContext()),
3076  MemAlign, false));
3077  return II;
3078  }
3079  break;
3080  }
3081 
3082  case Intrinsic::arm_neon_vmulls:
3083  case Intrinsic::arm_neon_vmullu:
3084  case Intrinsic::aarch64_neon_smull:
3085  case Intrinsic::aarch64_neon_umull: {
3086  Value *Arg0 = II->getArgOperand(0);
3087  Value *Arg1 = II->getArgOperand(1);
3088 
3089  // Handle mul by zero first:
3090  if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
3091  return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
3092  }
3093 
3094  // Check for constant LHS & RHS - in this case we just simplify.
3095  bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu ||
3096  II->getIntrinsicID() == Intrinsic::aarch64_neon_umull);
3097  VectorType *NewVT = cast<VectorType>(II->getType());
3098  if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
3099  if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
3100  CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext);
3101  CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext);
3102 
3103  return replaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1));
3104  }
3105 
3106  // Couldn't simplify - canonicalize constant to the RHS.
3107  std::swap(Arg0, Arg1);
3108  }
3109 
3110  // Handle mul by one:
3111  if (Constant *CV1 = dyn_cast<Constant>(Arg1))
3112  if (ConstantInt *Splat =
3113  dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
3114  if (Splat->isOne())
3115  return CastInst::CreateIntegerCast(Arg0, II->getType(),
3116  /*isSigned=*/!Zext);
3117 
3118  break;
3119  }
3120  case Intrinsic::amdgcn_rcp: {
3121  Value *Src = II->getArgOperand(0);
3122 
3123  // TODO: Move to ConstantFolding/InstSimplify?
3124  if (isa<UndefValue>(Src))
3125  return replaceInstUsesWith(CI, Src);
3126 
3127  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
3128  const APFloat &ArgVal = C->getValueAPF();
3129  APFloat Val(ArgVal.getSemantics(), 1.0);
3130  APFloat::opStatus Status = Val.divide(ArgVal,
3132  // Only do this if it was exact and therefore not dependent on the
3133  // rounding mode.
3134  if (Status == APFloat::opOK)
3135  return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val));
3136  }
3137 
3138  break;
3139  }
3140  case Intrinsic::amdgcn_rsq: {
3141  Value *Src = II->getArgOperand(0);
3142 
3143  // TODO: Move to ConstantFolding/InstSimplify?
3144  if (isa<UndefValue>(Src))
3145  return replaceInstUsesWith(CI, Src);
3146  break;
3147  }
3148  case Intrinsic::amdgcn_frexp_mant:
3149  case Intrinsic::amdgcn_frexp_exp: {
3150  Value *Src = II->getArgOperand(0);
3151  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
3152  int Exp;
3153  APFloat Significand = frexp(C->getValueAPF(), Exp,
3155 
3156  if (II->getIntrinsicID() == Intrinsic::amdgcn_frexp_mant) {
3157  return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(),
3158  Significand));
3159  }
3160 
3161  // Match instruction special case behavior.
3162  if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
3163  Exp = 0;
3164 
3165  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Exp));
3166  }
3167 
3168  if (isa<UndefValue>(Src))
3169  return replaceInstUsesWith(CI, UndefValue::get(II->getType()));
3170 
3171  break;
3172  }
3173  case Intrinsic::amdgcn_class: {
3174  enum {
3175  S_NAN = 1 << 0, // Signaling NaN
3176  Q_NAN = 1 << 1, // Quiet NaN
3177  N_INFINITY = 1 << 2, // Negative infinity
3178  N_NORMAL = 1 << 3, // Negative normal
3179  N_SUBNORMAL = 1 << 4, // Negative subnormal
3180  N_ZERO = 1 << 5, // Negative zero
3181  P_ZERO = 1 << 6, // Positive zero
3182  P_SUBNORMAL = 1 << 7, // Positive subnormal
3183  P_NORMAL = 1 << 8, // Positive normal
3184  P_INFINITY = 1 << 9 // Positive infinity
3185  };
3186 
3187  const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL |
3189 
3190  Value *Src0 = II->getArgOperand(0);
3191  Value *Src1 = II->getArgOperand(1);
3192  const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
3193  if (!CMask) {
3194  if (isa<UndefValue>(Src0))
3195  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3196 
3197  if (isa<UndefValue>(Src1))
3198  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
3199  break;
3200  }
3201 
3202  uint32_t Mask = CMask->getZExtValue();
3203 
3204  // If all tests are made, it doesn't matter what the value is.
3205  if ((Mask & FullMask) == FullMask)
3206  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), true));
3207 
3208  if ((Mask & FullMask) == 0)
3209  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
3210 
3211  if (Mask == (S_NAN | Q_NAN)) {
3212  // Equivalent of isnan. Replace with standard fcmp.
3213  Value *FCmp = Builder.CreateFCmpUNO(Src0, Src0);
3214  FCmp->takeName(II);
3215  return replaceInstUsesWith(*II, FCmp);
3216  }
3217 
3218  const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
3219  if (!CVal) {
3220  if (isa<UndefValue>(Src0))
3221  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3222 
3223  // Clamp mask to used bits
3224  if ((Mask & FullMask) != Mask) {
3225  CallInst *NewCall = Builder.CreateCall(II->getCalledFunction(),
3226  { Src0, ConstantInt::get(Src1->getType(), Mask & FullMask) }
3227  );
3228 
3229  NewCall->takeName(II);
3230  return replaceInstUsesWith(*II, NewCall);
3231  }
3232 
3233  break;
3234  }
3235 
3236  const APFloat &Val = CVal->getValueAPF();
3237 
3238  bool Result =
3239  ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) ||
3240  ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) ||
3241  ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) ||
3242  ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) ||
3243  ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) ||
3244  ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) ||
3245  ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) ||
3246  ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) ||
3247  ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) ||
3248  ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative());
3249 
3250  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), Result));
3251  }
3252  case Intrinsic::amdgcn_cvt_pkrtz: {
3253  Value *Src0 = II->getArgOperand(0);
3254  Value *Src1 = II->getArgOperand(1);
3255  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
3256  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
3257  const fltSemantics &HalfSem
3258  = II->getType()->getScalarType()->getFltSemantics();
3259  bool LosesInfo;
3260  APFloat Val0 = C0->getValueAPF();
3261  APFloat Val1 = C1->getValueAPF();
3262  Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
3263  Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
3264 
3265  Constant *Folded = ConstantVector::get({
3266  ConstantFP::get(II->getContext(), Val0),
3267  ConstantFP::get(II->getContext(), Val1) });
3268  return replaceInstUsesWith(*II, Folded);
3269  }
3270  }
3271 
3272  if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1))
3273  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3274 
3275  break;
3276  }
3277  case Intrinsic::amdgcn_ubfe:
3278  case Intrinsic::amdgcn_sbfe: {
3279  // Decompose simple cases into standard shifts.
3280  Value *Src = II->getArgOperand(0);
3281  if (isa<UndefValue>(Src))
3282  return replaceInstUsesWith(*II, Src);
3283 
3284  unsigned Width;
3285  Type *Ty = II->getType();
3286  unsigned IntSize = Ty->getIntegerBitWidth();
3287 
3288  ConstantInt *CWidth = dyn_cast<ConstantInt>(II->getArgOperand(2));
3289  if (CWidth) {
3290  Width = CWidth->getZExtValue();
3291  if ((Width & (IntSize - 1)) == 0)
3292  return replaceInstUsesWith(*II, ConstantInt::getNullValue(Ty));
3293 
3294  if (Width >= IntSize) {
3295  // Hardware ignores high bits, so remove those.
3296  II->setArgOperand(2, ConstantInt::get(CWidth->getType(),
3297  Width & (IntSize - 1)));
3298  return II;
3299  }
3300  }
3301 
3302  unsigned Offset;
3303  ConstantInt *COffset = dyn_cast<ConstantInt>(II->getArgOperand(1));
3304  if (COffset) {
3305  Offset = COffset->getZExtValue();
3306  if (Offset >= IntSize) {
3307  II->setArgOperand(1, ConstantInt::get(COffset->getType(),
3308  Offset & (IntSize - 1)));
3309  return II;
3310  }
3311  }
3312 
3313  bool Signed = II->getIntrinsicID() == Intrinsic::amdgcn_sbfe;
3314 
3315  // TODO: Also emit sub if only width is constant.
3316  if (!CWidth && COffset && Offset == 0) {
3317  Constant *KSize = ConstantInt::get(COffset->getType(), IntSize);
3318  Value *ShiftVal = Builder.CreateSub(KSize, II->getArgOperand(2));
3319  ShiftVal = Builder.CreateZExt(ShiftVal, II->getType());
3320 
3321  Value *Shl = Builder.CreateShl(Src, ShiftVal);
3322  Value *RightShift = Signed ? Builder.CreateAShr(Shl, ShiftVal)
3323  : Builder.CreateLShr(Shl, ShiftVal);
3324  RightShift->takeName(II);
3325  return replaceInstUsesWith(*II, RightShift);
3326  }
3327 
3328  if (!CWidth || !COffset)
3329  break;
3330 
3331  // TODO: This allows folding to undef when the hardware has specific
3332  // behavior?
3333  if (Offset + Width < IntSize) {
3334  Value *Shl = Builder.CreateShl(Src, IntSize - Offset - Width);
3335  Value *RightShift = Signed ? Builder.CreateAShr(Shl, IntSize - Width)
3336  : Builder.CreateLShr(Shl, IntSize - Width);
3337  RightShift->takeName(II);
3338  return replaceInstUsesWith(*II, RightShift);
3339  }
3340 
3341  Value *RightShift = Signed ? Builder.CreateAShr(Src, Offset)
3342  : Builder.CreateLShr(Src, Offset);
3343 
3344  RightShift->takeName(II);
3345  return replaceInstUsesWith(*II, RightShift);
3346  }
3347  case Intrinsic::amdgcn_exp:
3348  case Intrinsic::amdgcn_exp_compr: {
3349  ConstantInt *En = dyn_cast<ConstantInt>(II->getArgOperand(1));
3350  if (!En) // Illegal.
3351  break;
3352 
3353  unsigned EnBits = En->getZExtValue();
3354  if (EnBits == 0xf)
3355  break; // All inputs enabled.
3356 
3357  bool IsCompr = II->getIntrinsicID() == Intrinsic::amdgcn_exp_compr;
3358  bool Changed = false;
3359  for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
3360  if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
3361  (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
3362  Value *Src = II->getArgOperand(I + 2);
3363  if (!isa<UndefValue>(Src)) {
3364  II->setArgOperand(I + 2, UndefValue::get(Src->getType()));
3365  Changed = true;
3366  }
3367  }
3368  }
3369 
3370  if (Changed)
3371  return II;
3372 
3373  break;
3374 
3375  }
3376  case Intrinsic::amdgcn_fmed3: {
3377  // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
3378  // for the shader.
3379 
3380  Value *Src0 = II->getArgOperand(0);
3381  Value *Src1 = II->getArgOperand(1);
3382  Value *Src2 = II->getArgOperand(2);
3383 
3384  bool Swap = false;
3385  // Canonicalize constants to RHS operands.
3386  //
3387  // fmed3(c0, x, c1) -> fmed3(x, c0, c1)
3388  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
3389  std::swap(Src0, Src1);
3390  Swap = true;
3391  }
3392 
3393  if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
3394  std::swap(Src1, Src2);
3395  Swap = true;
3396  }
3397 
3398  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
3399  std::swap(Src0, Src1);
3400  Swap = true;
3401  }
3402 
3403  if (Swap) {
3404  II->setArgOperand(0, Src0);
3405  II->setArgOperand(1, Src1);
3406  II->setArgOperand(2, Src2);
3407  return II;
3408  }
3409 
3410  if (match(Src2, m_NaN()) || isa<UndefValue>(Src2)) {
3411  CallInst *NewCall = Builder.CreateMinNum(Src0, Src1);
3412  NewCall->copyFastMathFlags(II);
3413  NewCall->takeName(II);
3414  return replaceInstUsesWith(*II, NewCall);
3415  }
3416 
3417  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
3418  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
3419  if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
3420  APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
3421  C2->getValueAPF());
3422  return replaceInstUsesWith(*II,
3423  ConstantFP::get(Builder.getContext(), Result));
3424  }
3425  }
3426  }
3427 
3428  break;
3429  }
3430  case Intrinsic::amdgcn_icmp:
3431  case Intrinsic::amdgcn_fcmp: {
3432  const ConstantInt *CC = dyn_cast<ConstantInt>(II->getArgOperand(2));
3433  if (!CC)
3434  break;
3435 
3436  // Guard against invalid arguments.
3437  int64_t CCVal = CC->getZExtValue();
3438  bool IsInteger = II->getIntrinsicID() == Intrinsic::amdgcn_icmp;
3439  if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
3440  CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
3441  (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
3442  CCVal > CmpInst::LAST_FCMP_PREDICATE)))
3443  break;
3444 
3445  Value *Src0 = II->getArgOperand(0);
3446  Value *Src1 = II->getArgOperand(1);
3447 
3448  if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
3449  if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
3450  Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
3451  if (CCmp->isNullValue()) {
3452  return replaceInstUsesWith(
3453  *II, ConstantExpr::getSExt(CCmp, II->getType()));
3454  }
3455 
3456  // The result of V_ICMP/V_FCMP assembly instructions (which this
3457  // intrinsic exposes) is one bit per thread, masked with the EXEC
3458  // register (which contains the bitmask of live threads). So a
3459  // comparison that always returns true is the same as a read of the
3460  // EXEC register.
3462  II->getModule(), Intrinsic::read_register, II->getType());
3463  Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")};
3464  MDNode *MD = MDNode::get(II->getContext(), MDArgs);
3465  Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)};
3466  CallInst *NewCall = Builder.CreateCall(NewF, Args);
3469  NewCall->takeName(II);
3470  return replaceInstUsesWith(*II, NewCall);
3471  }
3472 
3473  // Canonicalize constants to RHS.
3474  CmpInst::Predicate SwapPred
3475  = CmpInst::getSwappedPredicate(static_cast<CmpInst::Predicate>(CCVal));
3476  II->setArgOperand(0, Src1);
3477  II->setArgOperand(1, Src0);
3478  II->setArgOperand(2, ConstantInt::get(CC->getType(),
3479  static_cast<int>(SwapPred)));
3480  return II;
3481  }
3482 
3483  if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
3484  break;
3485 
3486  // Canonicalize compare eq with true value to compare != 0
3487  // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
3488  // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
3489  // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
3490  // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
3491  Value *ExtSrc;
3492  if (CCVal == CmpInst::ICMP_EQ &&
3493  ((match(Src1, m_One()) && match(Src0, m_ZExt(m_Value(ExtSrc)))) ||
3494  (match(Src1, m_AllOnes()) && match(Src0, m_SExt(m_Value(ExtSrc))))) &&
3495  ExtSrc->getType()->isIntegerTy(1)) {
3496  II->setArgOperand(1, ConstantInt::getNullValue(Src1->getType()));
3497  II->setArgOperand(2, ConstantInt::get(CC->getType(), CmpInst::ICMP_NE));
3498  return II;
3499  }
3500 
3501  CmpInst::Predicate SrcPred;
3502  Value *SrcLHS;
3503  Value *SrcRHS;
3504 
3505  // Fold compare eq/ne with 0 from a compare result as the predicate to the
3506  // intrinsic. The typical use is a wave vote function in the library, which
3507  // will be fed from a user code condition compared with 0. Fold in the
3508  // redundant compare.
3509 
3510  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
3511  // -> llvm.amdgcn.[if]cmp(a, b, pred)
3512  //
3513  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
3514  // -> llvm.amdgcn.[if]cmp(a, b, inv pred)
3515  if (match(Src1, m_Zero()) &&
3516  match(Src0,
3517  m_ZExtOrSExt(m_Cmp(SrcPred, m_Value(SrcLHS), m_Value(SrcRHS))))) {
3518  if (CCVal == CmpInst::ICMP_EQ)
3519  SrcPred = CmpInst::getInversePredicate(SrcPred);
3520 
3521  Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred) ?
3522  Intrinsic::amdgcn_fcmp : Intrinsic::amdgcn_icmp;
3523 
3524  Value *NewF = Intrinsic::getDeclaration(II->getModule(), NewIID,
3525  SrcLHS->getType());
3526  Value *Args[] = { SrcLHS, SrcRHS,
3527  ConstantInt::get(CC->getType(), SrcPred) };
3528  CallInst *NewCall = Builder.CreateCall(NewF, Args);
3529  NewCall->takeName(II);
3530  return replaceInstUsesWith(*II, NewCall);
3531  }
3532 
3533  break;
3534  }
3535  case Intrinsic::stackrestore: {
3536  // If the save is right next to the restore, remove the restore. This can
3537  // happen when variable allocas are DCE'd.
3538  if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3539  if (SS->getIntrinsicID() == Intrinsic::stacksave) {
3540  if (&*++SS->getIterator() == II)
3541  return eraseInstFromFunction(CI);
3542  }
3543  }
3544 
3545  // Scan down this block to see if there is another stack restore in the
3546  // same block without an intervening call/alloca.
3547  BasicBlock::iterator BI(II);
3548  TerminatorInst *TI = II->getParent()->getTerminator();
3549  bool CannotRemove = false;
3550  for (++BI; &*BI != TI; ++BI) {
3551  if (isa<AllocaInst>(BI)) {
3552  CannotRemove = true;
3553  break;
3554  }
3555  if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
3556  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
3557  // If there is a stackrestore below this one, remove this one.
3558  if (II->getIntrinsicID() == Intrinsic::stackrestore)
3559  return eraseInstFromFunction(CI);
3560 
3561  // Bail if we cross over an intrinsic with side effects, such as
3562  // llvm.stacksave, llvm.read_register, or llvm.setjmp.
3563  if (II->mayHaveSideEffects()) {
3564  CannotRemove = true;
3565  break;
3566  }
3567  } else {
3568  // If we found a non-intrinsic call, we can't remove the stack
3569  // restore.
3570  CannotRemove = true;
3571  break;
3572  }
3573  }
3574  }
3575 
3576  // If the stack restore is in a return, resume, or unwind block and if there
3577  // are no allocas or calls between the restore and the return, nuke the
3578  // restore.
3579  if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
3580  return eraseInstFromFunction(CI);
3581  break;
3582  }
3583  case Intrinsic::lifetime_start:
3584  // Asan needs to poison memory to detect invalid access which is possible
3585  // even for empty lifetime range.
3586  if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress))
3587  break;
3588 
3589  if (removeTriviallyEmptyRange(*II, Intrinsic::lifetime_start,
3590  Intrinsic::lifetime_end, *this))
3591  return nullptr;
3592  break;
3593  case Intrinsic::assume: {
3594  Value *IIOperand = II->getArgOperand(0);
3595  // Remove an assume if it is immediately followed by an identical assume.
3596  if (match(II->getNextNode(),
3597  m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
3598  return eraseInstFromFunction(CI);
3599 
3600  // Canonicalize assume(a && b) -> assume(a); assume(b);
3601  // Note: New assumption intrinsics created here are registered by
3602  // the InstCombineIRInserter object.
3603  Value *AssumeIntrinsic = II->getCalledValue(), *A, *B;
3604  if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
3605  Builder.CreateCall(AssumeIntrinsic, A, II->getName());
3606  Builder.CreateCall(AssumeIntrinsic, B, II->getName());
3607  return eraseInstFromFunction(*II);
3608  }
3609  // assume(!(a || b)) -> assume(!a); assume(!b);
3610  if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
3611  Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(A), II->getName());
3612  Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(B), II->getName());
3613  return eraseInstFromFunction(*II);
3614  }
3615 
3616  // assume( (load addr) != null ) -> add 'nonnull' metadata to load
3617  // (if assume is valid at the load)
3618  CmpInst::Predicate Pred;
3619  Instruction *LHS;
3620  if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) &&
3621  Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
3622  LHS->getType()->isPointerTy() &&
3623  isValidAssumeForContext(II, LHS, &DT)) {
3624  MDNode *MD = MDNode::get(II->getContext(), None);
3626  return eraseInstFromFunction(*II);
3627 
3628  // TODO: apply nonnull return attributes to calls and invokes
3629  // TODO: apply range metadata for range check patterns?
3630  }
3631 
3632  // If there is a dominating assume with the same condition as this one,
3633  // then this one is redundant, and should be removed.
3634  KnownBits Known(1);
3635  computeKnownBits(IIOperand, Known, 0, II);
3636  if (Known.isAllOnes())
3637  return eraseInstFromFunction(*II);
3638 
3639  // Update the cache of affected values for this assumption (we might be
3640  // here because we just simplified the condition).
3641  AC.updateAffectedValues(II);
3642  break;
3643  }
3644  case Intrinsic::experimental_gc_relocate: {
3645  // Translate facts known about a pointer before relocating into
3646  // facts about the relocate value, while being careful to
3647  // preserve relocation semantics.
3648  Value *DerivedPtr = cast<GCRelocateInst>(II)->getDerivedPtr();
3649 
3650  // Remove the relocation if unused, note that this check is required
3651  // to prevent the cases below from looping forever.
3652  if (II->use_empty())
3653  return eraseInstFromFunction(*II);
3654 
3655  // Undef is undef, even after relocation.
3656  // TODO: provide a hook for this in GCStrategy. This is clearly legal for
3657  // most practical collectors, but there was discussion in the review thread
3658  // about whether it was legal for all possible collectors.
3659  if (isa<UndefValue>(DerivedPtr))
3660  // Use undef of gc_relocate's type to replace it.
3661  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3662 
3663  if (auto *PT = dyn_cast<PointerType>(II->getType())) {
3664  // The relocation of null will be null for most any collector.
3665  // TODO: provide a hook for this in GCStrategy. There might be some
3666  // weird collector this property does not hold for.
3667  if (isa<ConstantPointerNull>(DerivedPtr))
3668  // Use null-pointer of gc_relocate's type to replace it.
3669  return replaceInstUsesWith(*II, ConstantPointerNull::get(PT));
3670 
3671  // isKnownNonNull -> nonnull attribute
3672  if (isKnownNonZero(DerivedPtr, DL, 0, &AC, II, &DT))
3673  II->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
3674  }
3675 
3676  // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
3677  // Canonicalize on the type from the uses to the defs
3678 
3679  // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
3680  break;
3681  }
3682 
3683  case Intrinsic::experimental_guard: {
3684  // Is this guard followed by another guard?
3685  Instruction *NextInst = II->getNextNode();
3686  Value *NextCond = nullptr;
3687  if (match(NextInst,
3688  m_Intrinsic<Intrinsic::experimental_guard>(m_Value(NextCond)))) {
3689  Value *CurrCond = II->getArgOperand(0);
3690 
3691  // Remove a guard that it is immediately preceded by an identical guard.
3692  if (CurrCond == NextCond)
3693  return eraseInstFromFunction(*NextInst);
3694 
3695  // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3696  II->setArgOperand(0, Builder.CreateAnd(CurrCond, NextCond));
3697  return eraseInstFromFunction(*NextInst);
3698  }
3699  break;
3700  }
3701  }
3702  return visitCallSite(II);
3703 }
3704 
3705 // Fence instruction simplification
3707  // Remove identical consecutive fences.
3708  if (auto *NFI = dyn_cast<FenceInst>(FI.getNextNode()))
3709  if (FI.isIdenticalTo(NFI))
3710  return eraseInstFromFunction(FI);
3711  return nullptr;
3712 }
3713 
3714 // InvokeInst simplification
3715 //
3717  return visitCallSite(&II);
3718 }
3719 
3720 /// If this cast does not affect the value passed through the varargs area, we
3721 /// can eliminate the use of the cast.
3723  const DataLayout &DL,
3724  const CastInst *const CI,
3725  const int ix) {
3726  if (!CI->isLosslessCast())
3727  return false;
3728 
3729  // If this is a GC intrinsic, avoid munging types. We need types for
3730  // statepoint reconstruction in SelectionDAG.
3731  // TODO: This is probably something which should be expanded to all
3732  // intrinsics since the entire point of intrinsics is that
3733  // they are understandable by the optimizer.
3734  if (isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS))
3735  return false;
3736 
3737  // The size of ByVal or InAlloca arguments is derived from the type, so we
3738  // can't change to a type with a different size. If the size were
3739  // passed explicitly we could avoid this check.
3740  if (!CS.isByValOrInAllocaArgument(ix))
3741  return true;
3742 
3743  Type* SrcTy =
3744  cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
3745  Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
3746  if (!SrcTy->isSized() || !DstTy->isSized())
3747  return false;
3748  if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
3749  return false;
3750  return true;
3751 }
3752 
3753 Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
3754  if (!CI->getCalledFunction()) return nullptr;
3755 
3756  auto InstCombineRAUW = [this](Instruction *From, Value *With) {
3757  replaceInstUsesWith(*From, With);
3758  };
3759  LibCallSimplifier Simplifier(DL, &TLI, ORE, InstCombineRAUW);
3760  if (Value *With = Simplifier.optimizeCall(CI)) {
3761  ++NumSimplified;
3762  return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
3763  }
3764 
3765  return nullptr;
3766 }
3767 
3769  // Strip off at most one level of pointer casts, looking for an alloca. This
3770  // is good enough in practice and simpler than handling any number of casts.
3771  Value *Underlying = TrampMem->stripPointerCasts();
3772  if (Underlying != TrampMem &&
3773  (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
3774  return nullptr;
3775  if (!isa<AllocaInst>(Underlying))
3776  return nullptr;
3777 
3778  IntrinsicInst *InitTrampoline = nullptr;
3779  for (User *U : TrampMem->users()) {
3781  if (!II)
3782  return nullptr;
3783  if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
3784  if (InitTrampoline)
3785  // More than one init_trampoline writes to this value. Give up.
3786  return nullptr;
3787  InitTrampoline = II;
3788  continue;
3789  }
3790  if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
3791  // Allow any number of calls to adjust.trampoline.
3792  continue;
3793  return nullptr;
3794  }
3795 
3796  // No call to init.trampoline found.
3797  if (!InitTrampoline)
3798  return nullptr;
3799 
3800  // Check that the alloca is being used in the expected way.
3801  if (InitTrampoline->getOperand(0) != TrampMem)
3802  return nullptr;
3803 
3804  return InitTrampoline;
3805 }
3806 
3808  Value *TrampMem) {
3809  // Visit all the previous instructions in the basic block, and try to find a
3810  // init.trampoline which has a direct path to the adjust.trampoline.
3811  for (BasicBlock::iterator I = AdjustTramp->getIterator(),
3812  E = AdjustTramp->getParent()->begin();
3813  I != E;) {
3814  Instruction *Inst = &*--I;
3815  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
3816  if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
3817  II->getOperand(0) == TrampMem)
3818  return II;
3819  if (Inst->mayWriteToMemory())
3820  return nullptr;
3821  }
3822  return nullptr;
3823 }
3824 
3825 // Given a call to llvm.adjust.trampoline, find and return the corresponding
3826 // call to llvm.init.trampoline if the call to the trampoline can be optimized
3827 // to a direct call to a function. Otherwise return NULL.
3828 //
3830  Callee = Callee->stripPointerCasts();
3831  IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
3832  if (!AdjustTramp ||
3833  AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
3834  return nullptr;
3835 
3836  Value *TrampMem = AdjustTramp->getOperand(0);
3837 
3839  return IT;
3840  if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
3841  return IT;
3842  return nullptr;
3843 }
3844 
3845 /// Improvements for call and invoke instructions.
3846 Instruction *InstCombiner::visitCallSite(CallSite CS) {
3847  if (isAllocLikeFn(CS.getInstruction(), &TLI))
3848  return visitAllocSite(*CS.getInstruction());
3849 
3850  bool Changed = false;
3851 
3852  // Mark any parameters that are known to be non-null with the nonnull
3853  // attribute. This is helpful for inlining calls to functions with null
3854  // checks on their arguments.
3855  SmallVector<unsigned, 4> ArgNos;
3856  unsigned ArgNo = 0;
3857 
3858  for (Value *V : CS.args()) {
3859  if (V->getType()->isPointerTy() &&
3860  !CS.paramHasAttr(ArgNo, Attribute::NonNull) &&
3861  isKnownNonZero(V, DL, 0, &AC, CS.getInstruction(), &DT))
3862  ArgNos.push_back(ArgNo);
3863  ArgNo++;
3864  }
3865 
3866  assert(ArgNo == CS.arg_size() && "sanity check");
3867 
3868  if (!ArgNos.empty()) {
3870  LLVMContext &Ctx = CS.getInstruction()->getContext();
3871  AS = AS.addParamAttribute(Ctx, ArgNos,
3872  Attribute::get(Ctx, Attribute::NonNull));
3873  CS.setAttributes(AS);
3874  Changed = true;
3875  }
3876 
3877  // If the callee is a pointer to a function, attempt to move any casts to the
3878  // arguments of the call/invoke.
3879  Value *Callee = CS.getCalledValue();
3880  if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
3881  return nullptr;
3882 
3883  if (Function *CalleeF = dyn_cast<Function>(Callee)) {
3884  // Remove the convergent attr on calls when the callee is not convergent.
3885  if (CS.isConvergent() && !CalleeF->isConvergent() &&
3886  !CalleeF->isIntrinsic()) {
3887  DEBUG(dbgs() << "Removing convergent attr from instr "
3888  << CS.getInstruction() << "\n");
3889  CS.setNotConvergent();
3890  return CS.getInstruction();
3891  }
3892 
3893  // If the call and callee calling conventions don't match, this call must
3894  // be unreachable, as the call is undefined.
3895  if (CalleeF->getCallingConv() != CS.getCallingConv() &&
3896  // Only do this for calls to a function with a body. A prototype may
3897  // not actually end up matching the implementation's calling conv for a
3898  // variety of reasons (e.g. it may be written in assembly).
3899  !CalleeF->isDeclaration()) {
3900  Instruction *OldCall = CS.getInstruction();
3901  new StoreInst(ConstantInt::getTrue(Callee->getContext()),
3903  OldCall);
3904  // If OldCall does not return void then replaceAllUsesWith undef.
3905  // This allows ValueHandlers and custom metadata to adjust itself.
3906  if (!OldCall->getType()->isVoidTy())
3907  replaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
3908  if (isa<CallInst>(OldCall))
3909  return eraseInstFromFunction(*OldCall);
3910 
3911  // We cannot remove an invoke, because it would change the CFG, just
3912  // change the callee to a null pointer.
3913  cast<InvokeInst>(OldCall)->setCalledFunction(
3914  Constant::getNullValue(CalleeF->getType()));
3915  return nullptr;
3916  }
3917  }
3918 
3919  if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
3920  // If CS does not return void then replaceAllUsesWith undef.
3921  // This allows ValueHandlers and custom metadata to adjust itself.
3922  if (!CS.getInstruction()->getType()->isVoidTy())
3923  replaceInstUsesWith(*CS.getInstruction(),
3925 
3926  if (isa<InvokeInst>(CS.getInstruction())) {
3927  // Can't remove an invoke because we cannot change the CFG.
3928  return nullptr;
3929  }
3930 
3931  // This instruction is not reachable, just remove it. We insert a store to
3932  // undef so that we know that this code is not reachable, despite the fact
3933  // that we can't modify the CFG here.
3934  new StoreInst(ConstantInt::getTrue(Callee->getContext()),
3936  CS.getInstruction());
3937 
3938  return eraseInstFromFunction(*CS.getInstruction());
3939  }
3940 
3941  if (IntrinsicInst *II = findInitTrampoline(Callee))
3942  return transformCallThroughTrampoline(CS, II);
3943 
3944  PointerType *PTy = cast<PointerType>(Callee->getType());
3945  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
3946  if (FTy->isVarArg()) {
3947  int ix = FTy->getNumParams();
3948  // See if we can optimize any arguments passed through the varargs area of
3949  // the call.
3950  for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
3951  E = CS.arg_end(); I != E; ++I, ++ix) {
3952  CastInst *CI = dyn_cast<CastInst>(*I);
3953  if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) {
3954  *I = CI->getOperand(0);
3955  Changed = true;
3956  }
3957  }
3958  }
3959 
3960  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
3961  // Inline asm calls cannot throw - mark them 'nounwind'.
3962  CS.setDoesNotThrow();
3963  Changed = true;
3964  }
3965 
3966  // Try to optimize the call if possible, we require DataLayout for most of
3967  // this. None of these calls are seen as possibly dead so go ahead and
3968  // delete the instruction now.
3969  if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
3970  Instruction *I = tryOptimizeCall(CI);
3971  // If we changed something return the result, etc. Otherwise let
3972  // the fallthrough check.
3973  if (I) return eraseInstFromFunction(*I);
3974  }
3975 
3976  return Changed ? CS.getInstruction() : nullptr;
3977 }
3978 
3979 /// If the callee is a constexpr cast of a function, attempt to move the cast to
3980 /// the arguments of the call/invoke.
3981 bool InstCombiner::transformConstExprCastCall(CallSite CS) {
3983  if (!Callee)
3984  return false;
3985 
3986  // The prototype of a thunk is a lie. Don't directly call such a function.
3987  if (Callee->hasFnAttribute("thunk"))
3988  return false;
3989 
3990  Instruction *Caller = CS.getInstruction();
3991  const AttributeList &CallerPAL = CS.getAttributes();
3992 
3993  // Okay, this is a cast from a function to a different type. Unless doing so
3994  // would cause a type conversion of one of our arguments, change this call to
3995  // be a direct call with arguments casted to the appropriate types.
3996  //
3997  FunctionType *FT = Callee->getFunctionType();
3998  Type *OldRetTy = Caller->getType();
3999  Type *NewRetTy = FT->getReturnType();
4000 
4001  // Check to see if we are changing the return type...
4002  if (OldRetTy != NewRetTy) {
4003 
4004  if (NewRetTy->isStructTy())
4005  return false; // TODO: Handle multiple return values.
4006 
4007  if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
4008  if (Callee->isDeclaration())
4009  return false; // Cannot transform this return value.
4010 
4011  if (!Caller->use_empty() &&
4012  // void -> non-void is handled specially
4013  !NewRetTy->isVoidTy())
4014  return false; // Cannot transform this return value.
4015  }
4016 
4017  if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
4018  AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
4019  if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
4020  return false; // Attribute not compatible with transformed value.
4021  }
4022 
4023  // If the callsite is an invoke instruction, and the return value is used by
4024  // a PHI node in a successor, we cannot change the return type of the call
4025  // because there is no place to put the cast instruction (without breaking
4026  // the critical edge). Bail out in this case.
4027  if (!Caller->use_empty())
4028  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
4029  for (User *U : II->users())
4030  if (PHINode *PN = dyn_cast<PHINode>(U))
4031  if (PN->getParent() == II->getNormalDest() ||
4032  PN->getParent() == II->getUnwindDest())
4033  return false;
4034  }
4035 
4036  unsigned NumActualArgs = CS.arg_size();
4037  unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
4038 
4039  // Prevent us turning:
4040  // declare void @takes_i32_inalloca(i32* inalloca)
4041  // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
4042  //
4043  // into:
4044  // call void @takes_i32_inalloca(i32* null)
4045  //
4046  // Similarly, avoid folding away bitcasts of byval calls.
4047  if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
4048  Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
4049  return false;
4050 
4052  for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
4053  Type *ParamTy = FT->getParamType(i);
4054  Type *ActTy = (*AI)->getType();
4055 
4056  if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
4057  return false; // Cannot transform this parameter value.
4058 
4059  if (AttrBuilder(CallerPAL.getParamAttributes(i))
4060  .overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
4061  return false; // Attribute not compatible with transformed value.
4062 
4063  if (CS.isInAllocaArgument(i))
4064  return false; // Cannot transform to and from inalloca.
4065 
4066  // If the parameter is passed as a byval argument, then we have to have a
4067  // sized type and the sized type has to have the same size as the old type.
4068  if (ParamTy != ActTy && CallerPAL.hasParamAttribute(i, Attribute::ByVal)) {
4069  PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
4070  if (!ParamPTy || !ParamPTy->getElementType()->isSized())
4071  return false;
4072 
4073  Type *CurElTy = ActTy->getPointerElementType();
4074  if (DL.getTypeAllocSize(CurElTy) !=
4075  DL.getTypeAllocSize(ParamPTy->getElementType()))
4076  return false;
4077  }
4078  }
4079 
4080  if (Callee->isDeclaration()) {
4081  // Do not delete arguments unless we have a function body.
4082  if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
4083  return false;
4084 
4085  // If the callee is just a declaration, don't change the varargsness of the
4086  // call. We don't want to introduce a varargs call where one doesn't
4087  // already exist.
4088  PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
4089  if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
4090  return false;
4091 
4092  // If both the callee and the cast type are varargs, we still have to make
4093  // sure the number of fixed parameters are the same or we have the same
4094  // ABI issues as if we introduce a varargs call.
4095  if (FT->isVarArg() &&
4096  cast<FunctionType>(APTy->getElementType())->isVarArg() &&
4097  FT->getNumParams() !=
4098  cast<FunctionType>(APTy->getElementType())->getNumParams())
4099  return false;
4100  }
4101 
4102  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
4103  !CallerPAL.isEmpty()) {
4104  // In this case we have more arguments than the new function type, but we
4105  // won't be dropping them. Check that these extra arguments have attributes
4106  // that are compatible with being a vararg call argument.
4107  unsigned SRetIdx;
4108  if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
4109  SRetIdx > FT->getNumParams())
4110  return false;
4111  }
4112 
4113  // Okay, we decided that this is a safe thing to do: go ahead and start
4114  // inserting cast instructions as necessary.
4117  Args.reserve(NumActualArgs);
4118  ArgAttrs.reserve(NumActualArgs);
4119 
4120  // Get any return attributes.
4121  AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
4122 
4123  // If the return value is not being used, the type may not be compatible
4124  // with the existing attributes. Wipe out any problematic attributes.
4125  RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
4126 
4127  AI = CS.arg_begin();
4128  for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
4129  Type *ParamTy = FT->getParamType(i);
4130 
4131  Value *NewArg = *AI;
4132  if ((*AI)->getType() != ParamTy)
4133  NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
4134  Args.push_back(NewArg);
4135 
4136  // Add any parameter attributes.
4137  ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
4138  }
4139 
4140  // If the function takes more arguments than the call was taking, add them
4141  // now.
4142  for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
4144  ArgAttrs.push_back(AttributeSet());
4145  }
4146 
4147  // If we are removing arguments to the function, emit an obnoxious warning.
4148  if (FT->getNumParams() < NumActualArgs) {
4149  // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
4150  if (FT->isVarArg()) {
4151  // Add all of the arguments in their promoted form to the arg list.
4152  for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
4153  Type *PTy = getPromotedType((*AI)->getType());
4154  Value *NewArg = *AI;
4155  if (PTy != (*AI)->getType()) {
4156  // Must promote to pass through va_arg area!
4157  Instruction::CastOps opcode =
4158  CastInst::getCastOpcode(*AI, false, PTy, false);
4159  NewArg = Builder.CreateCast(opcode, *AI, PTy);
4160  }
4161  Args.push_back(NewArg);
4162 
4163  // Add any parameter attributes.
4164  ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
4165  }
4166  }
4167  }
4168 
4169  AttributeSet FnAttrs = CallerPAL.getFnAttributes();
4170 
4171  if (NewRetTy->isVoidTy())
4172  Caller->setName(""); // Void type should not have a name.
4173 
4174  assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
4175  "missing argument attributes");
4176  LLVMContext &Ctx = Callee->getContext();
4177  AttributeList NewCallerPAL = AttributeList::get(
4178  Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
4179 
4181  CS.getOperandBundlesAsDefs(OpBundles);
4182 
4183  CallSite NewCS;
4184  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4185  NewCS = Builder.CreateInvoke(Callee, II->getNormalDest(),
4186  II->getUnwindDest(), Args, OpBundles);
4187  } else {
4188  NewCS = Builder.CreateCall(Callee, Args, OpBundles);
4189  cast<CallInst>(NewCS.getInstruction())
4190  ->setTailCallKind(cast<CallInst>(Caller)->getTailCallKind());
4191  }
4192  NewCS->takeName(Caller);
4193  NewCS.setCallingConv(CS.getCallingConv());
4194  NewCS.setAttributes(NewCallerPAL);
4195 
4196  // Preserve the weight metadata for the new call instruction. The metadata
4197  // is used by SamplePGO to check callsite's hotness.
4198  uint64_t W;
4199  if (Caller->extractProfTotalWeight(W))
4200  NewCS->setProfWeight(W);
4201 
4202  // Insert a cast of the return type as necessary.
4203  Instruction *NC = NewCS.getInstruction();
4204  Value *NV = NC;
4205  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
4206  if (!NV->getType()->isVoidTy()) {
4207  NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
4208  NC->setDebugLoc(Caller->getDebugLoc());
4209 
4210  // If this is an invoke instruction, we should insert it after the first
4211  // non-phi, instruction in the normal successor block.
4212  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4213  BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
4214  InsertNewInstBefore(NC, *I);
4215  } else {
4216  // Otherwise, it's a call, just insert cast right after the call.
4217  InsertNewInstBefore(NC, *Caller);
4218  }
4219  Worklist.AddUsersToWorkList(*Caller);
4220  } else {
4221  NV = UndefValue::get(Caller->getType());
4222  }
4223  }
4224 
4225  if (!Caller->use_empty())
4226  replaceInstUsesWith(*Caller, NV);
4227  else if (Caller->hasValueHandle()) {
4228  if (OldRetTy == NV->getType())
4229  ValueHandleBase::ValueIsRAUWd(Caller, NV);
4230  else
4231  // We cannot call ValueIsRAUWd with a different type, and the
4232  // actual tracked value will disappear.
4234  }
4235 
4236  eraseInstFromFunction(*Caller);
4237  return true;
4238 }
4239 
4240 /// Turn a call to a function created by init_trampoline / adjust_trampoline
4241 /// intrinsic pair into a direct call to the underlying function.
4242 Instruction *
4243 InstCombiner::transformCallThroughTrampoline(CallSite CS,
4244  IntrinsicInst *Tramp) {
4245  Value *Callee = CS.getCalledValue();
4246  PointerType *PTy = cast<PointerType>(Callee->getType());
4247  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
4249 
4250  // If the call already has the 'nest' attribute somewhere then give up -
4251  // otherwise 'nest' would occur twice after splicing in the chain.
4252  if (Attrs.hasAttrSomewhere(Attribute::Nest))
4253  return nullptr;
4254 
4255  assert(Tramp &&
4256  "transformCallThroughTrampoline called with incorrect CallSite.");
4257 
4258  Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
4259  FunctionType *NestFTy = cast<FunctionType>(NestF->getValueType());
4260 
4261  AttributeList NestAttrs = NestF->getAttributes();
4262  if (!NestAttrs.isEmpty()) {
4263  unsigned NestArgNo = 0;
4264  Type *NestTy = nullptr;
4265  AttributeSet NestAttr;
4266 
4267  // Look for a parameter marked with the 'nest' attribute.
4268  for (FunctionType::param_iterator I = NestFTy->param_begin(),
4269  E = NestFTy->param_end();
4270  I != E; ++NestArgNo, ++I) {
4271  AttributeSet AS = NestAttrs.getParamAttributes(NestArgNo);
4272  if (AS.hasAttribute(Attribute::Nest)) {
4273  // Record the parameter type and any other attributes.
4274  NestTy = *I;
4275  NestAttr = AS;
4276  break;
4277  }
4278  }
4279 
4280  if (NestTy) {
4281  Instruction *Caller = CS.getInstruction();
4282  std::vector<Value*> NewArgs;
4283  std::vector<AttributeSet> NewArgAttrs;
4284  NewArgs.reserve(CS.arg_size() + 1);
4285  NewArgAttrs.reserve(CS.arg_size());
4286 
4287  // Insert the nest argument into the call argument list, which may
4288  // mean appending it. Likewise for attributes.
4289 
4290  {
4291  unsigned ArgNo = 0;
4292  CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
4293  do {
4294  if (ArgNo == NestArgNo) {
4295  // Add the chain argument and attributes.
4296  Value *NestVal = Tramp->getArgOperand(2);
4297  if (NestVal->getType() != NestTy)
4298  NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
4299  NewArgs.push_back(NestVal);
4300  NewArgAttrs.push_back(NestAttr);
4301  }
4302 
4303  if (I == E)
4304  break;
4305 
4306  // Add the original argument and attributes.
4307  NewArgs.push_back(*I);
4308  NewArgAttrs.push_back(Attrs.getParamAttributes(ArgNo));
4309 
4310  ++ArgNo;
4311  ++I;
4312  } while (true);
4313  }
4314 
4315  // The trampoline may have been bitcast to a bogus type (FTy).
4316  // Handle this by synthesizing a new function type, equal to FTy
4317  // with the chain parameter inserted.
4318 
4319  std::vector<Type*> NewTypes;
4320  NewTypes.reserve(FTy->getNumParams()+1);
4321 
4322  // Insert the chain's type into the list of parameter types, which may
4323  // mean appending it.
4324  {
4325  unsigned ArgNo = 0;
4326  FunctionType::param_iterator I = FTy->param_begin(),
4327  E = FTy->param_end();
4328 
4329  do {
4330  if (ArgNo == NestArgNo)
4331  // Add the chain's type.
4332  NewTypes.push_back(NestTy);
4333 
4334  if (I == E)
4335  break;
4336 
4337  // Add the original type.
4338  NewTypes.push_back(*I);
4339 
4340  ++ArgNo;
4341  ++I;
4342  } while (true);
4343  }
4344 
4345  // Replace the trampoline call with a direct call. Let the generic
4346  // code sort out any function type mismatches.
4347  FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
4348  FTy->isVarArg());
4349  Constant *NewCallee =
4350  NestF->getType() == PointerType::getUnqual(NewFTy) ?
4351  NestF : ConstantExpr::getBitCast(NestF,
4352  PointerType::getUnqual(NewFTy));
4353  AttributeList NewPAL =
4354  AttributeList::get(FTy->getContext(), Attrs.getFnAttributes(),
4355  Attrs.getRetAttributes(), NewArgAttrs);
4356 
4358  CS.getOperandBundlesAsDefs(OpBundles);
4359 
4360  Instruction *NewCaller;
4361  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4362  NewCaller = InvokeInst::Create(NewCallee,
4363  II->getNormalDest(), II->getUnwindDest(),
4364  NewArgs, OpBundles);
4365  cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
4366  cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
4367  } else {
4368  NewCaller = CallInst::Create(NewCallee, NewArgs, OpBundles);
4369  cast<CallInst>(NewCaller)->setTailCallKind(
4370  cast<CallInst>(Caller)->getTailCallKind());
4371  cast<CallInst>(NewCaller)->setCallingConv(
4372  cast<CallInst>(Caller)->getCallingConv());
4373  cast<CallInst>(NewCaller)->setAttributes(NewPAL);
4374  }
4375 
4376  return NewCaller;
4377  }
4378  }
4379 
4380  // Replace the trampoline call with a direct call. Since there is no 'nest'
4381  // parameter, there is no need to adjust the argument list. Let the generic
4382  // code sort out any function type mismatches.
4383  Constant *NewCallee =
4384  NestF->getType() == PTy ? NestF :
4385  ConstantExpr::getBitCast(NestF, PTy);
4386  CS.setCalledFunction(NewCallee);
4387  return CS.getInstruction();
4388 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool isFPPredicate() const
Definition: InstrTypes.h:951
const NoneType None
Definition: None.h:24
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double, and whose elements are just simple data values (i.e.
Definition: Constants.h:735
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:574
uint64_t CallInst * C
User::op_iterator arg_iterator
The type of iterator to use when looping over actual arguments at this call site. ...
Definition: CallSite.h:213
LibCallSimplifier - This class implements a collection of optimizations that replace well formed call...
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:172
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMin(const Opnd0 &Op0, const Opnd1 &Op1)
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction, which must be an operator which supports these flags.
void setDoesNotThrow()
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:72
static void ValueIsDeleted(Value *V)
Definition: Value.cpp:824
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1634
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
bool isZero() const
Definition: APFloat.h:1128
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:173
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:80
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1542
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1034
static Value * simplifyX86immShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions that feed it, giving the original input.
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:555
DiagnosticInfoOptimizationBase::Argument NV
unsigned arg_size() const
Definition: CallSite.h:219
CallingConv::ID getCallingConv() const
Get the calling convention of the call.
Definition: CallSite.h:312
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:289
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
Constant * getElementAsConstant(unsigned i) const
Return a Constant for a specified index&#39;s element.
Definition: Constants.cpp:2645
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition: KnownBits.h:188
bool isInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed in an alloca.
Definition: CallSite.h:585
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMax(const Opnd0 &Op0, const Opnd1 &Op1)
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:262
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
An instruction for ordering other memory operations.
Definition: Instructions.h:440
match_zero m_Zero()
Match an arbitrary zero/null constant.
Definition: PatternMatch.h:145
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:446
Instruction * visitVACopyInst(VACopyInst &I)
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1237
static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC)
This class represents a function call, abstracting a target machine&#39;s calling convention.
This file contains the declarations for metadata subclasses.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:641
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this load instruction.
Definition: Instructions.h:239
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:91
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:617
iterator_range< IterTy > args() const
Definition: CallSite.h:215
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
bool hasValueHandle() const
Return true if there is a value handle associated with this value.
Definition: Value.h:481
unsigned less or equal
Definition: InstrTypes.h:886
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
unsigned less than
Definition: InstrTypes.h:885
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:697
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
This class wraps the llvm.memset intrinsic.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:816
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:818
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1386
bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr)
Return true if it is valid to use the assumptions provided by an assume intrinsic, I, at the point in the control-flow identified by the context instruction, CxtI.
STATISTIC(NumFunctions, "Total number of functions")
Metadata node.
Definition: Metadata.h:862
F(f)
static CallInst * Create(Value *Func, ArrayRef< Value *> Args, ArrayRef< OperandBundleDef > Bundles=None, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
const fltSemantics & getSemantics() const
Definition: APFloat.h:1140
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:520
An instruction for reading from memory.
Definition: Instructions.h:164
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:883
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:1832
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:168
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
static OverflowCheckFlavor IntrinsicIDToOverflowCheckFlavor(unsigned ID)
Returns the OverflowCheckFlavor corresponding to a overflow_with_op intrinsic.
fneg_match< LHS > m_FNeg(const LHS &L)
Match a floating point negate.
void reserve(size_type N)
Definition: SmallVector.h:380
static Instruction * simplifyMaskedStore(IntrinsicInst &II, InstCombiner &IC)
Instruction * visitVAStartInst(VAStartInst &I)
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:528
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
bool isGCRelocate(ImmutableCallSite CS)
Definition: Statepoint.cpp:43
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
const CallInst * isFreeCall(const Value *I, const TargetLibraryInfo *TLI)
isFreeCall - Returns non-null if the value is a call to the builtin free()
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:207
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:138
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op...
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
bool isIdenticalTo(const Instruction *I) const
Return true if the specified instruction is exactly identical to the current one. ...
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:968
static Instruction * SimplifyNVVMIntrinsic(IntrinsicInst *II, InstCombiner &IC)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
Instruction * visitInvokeInst(InvokeInst &II)
static Constant * getIntegerCast(Constant *C, Type *Ty, bool isSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:1518
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:515
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
Type * getPointerElementType() const
Definition: Type.h:373
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:958
OverflowCheckFlavor
Specific patterns of overflow check idioms that we match.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
unsigned getNumArgOperands() const
Return the number of call arguments.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:560
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:981
This class wraps the llvm.memmove intrinsic.
AttributeList addParamAttribute(LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const
Add an argument attribute to the list.
Definition: Attributes.h:398
Value * SimplifyCall(ImmutableCallSite CS, Value *V, User::op_iterator ArgBegin, User::op_iterator ArgEnd, const SimplifyQuery &Q)
Given a function and iterators over arguments, fold the result or return null.
IterTy arg_end() const
Definition: CallSite.h:557
Instruction * eraseInstFromFunction(Instruction &I)
Combiner aware instruction erasure.
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
Definition: PatternMatch.h:912
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:664
The core instruction combiner logic.
static bool isSafeToEliminateVarargsCast(const CallSite CS, const DataLayout &DL, const CastInst *const CI, const int ix)
If this cast does not affect the value passed through the varargs area, we can eliminate the use of t...
bool hasUnsafeAlgebra() const
Determine whether the unsafe-algebra flag is set.
InstrTy * getInstruction() const
Definition: CallSite.h:92
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1556
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:284
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:966
This file implements a class to represent arbitrary precision integral constant values and operations...
not_match< LHS > m_Not(const LHS &L)
Definition: PatternMatch.h:985
All zero aggregate value.
Definition: Constants.h:332
static Value * simplifyX86vpermv(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
Metadata * LowAndHigh[]
ValTy * getCalledValue() const
Return the pointer to function that is being called.
Definition: CallSite.h:100
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
DominatorTree & getDominatorTree() const
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition: KnownBits.h:193
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:86
Class to represent function types.
Definition: DerivedTypes.h:103
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1444
bool isInfinity() const
Definition: APFloat.h:1129
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:1405
This represents the llvm.va_start intrinsic.
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
Matches FPExt.
Definition: PatternMatch.h:955
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4438
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:924
AttributeSet getParamAttributes(unsigned ArgNo) const
The attributes for the argument or parameter at the given index are returned.
bool isVarArg() const
Definition: DerivedTypes.h:123
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:377
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:194
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.h:1835
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
AttrBuilder & remove(const AttrBuilder &B)
Remove the attributes from the builder.
static Value * simplifyX86pack(IntrinsicInst &II, bool IsSigned)
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:205
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition: APFloat.h:166
An instruction for storing to memory.
Definition: Instructions.h:306
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
Definition: Metadata.cpp:1328
SelectClass_match< Cond, LHS, RHS > m_Select(const Cond &C, const LHS &L, const RHS &R)
Definition: PatternMatch.h:869
static void ValueIsRAUWd(Value *Old, Value *New)
Definition: Value.cpp:877
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1383
static Value * simplifyX86vpcom(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned)
Decode XOP integer vector comparison intrinsics.
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:290
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:408
static Value * simplifyX86movmsk(const IntrinsicInst &II)
amdgpu Simplify well known AMD library false Value * Callee
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:975
This class represents a truncation of integer types.
static unsigned getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition: Local.h:190
Type * getElementType() const
Return the element type of the array/vector.
Definition: Constants.cpp:2271
Value * getOperand(unsigned i) const
Definition: User.h:154
Class to represent pointers.
Definition: DerivedTypes.h:467
bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
Definition: Attributes.cpp:573
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:277
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:301
const DataLayout & getDataLayout() const
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:106
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1678
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:141
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:602
bool hasAttrSomewhere(Attribute::AttrKind Kind, unsigned *Index=nullptr) const
Return true if the specified attribute is set for at least one parameter or for the return value...
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:63
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1164
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:357
void setAttributes(AttributeList PAL)
Set the parameter attributes of the call.
Definition: CallSite.h:333
Instruction * visitFenceInst(FenceInst &FI)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
static Instruction * simplifyMaskedScatter(IntrinsicInst &II, InstCombiner &IC)
const Value * getCalledValue() const
Get a pointer to the function that is invoked by this instruction.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:54
static AttributeSet get(LLVMContext &C, const AttrBuilder &B)
Definition: Attributes.cpp:503
bool isNegative() const
Definition: APFloat.h:1132
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:281
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1306
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1045
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:421
ConstantInt * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to .objectsize into an integer value of the given Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
Definition: PatternMatch.h:580
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:288
bool isNaN() const
Definition: APFloat.h:1130
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:1689
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
static cl::opt< unsigned > UnfoldElementAtomicMemcpyMaxElements("unfold-element-atomic-memcpy-max-elements", cl::init(16), cl::desc("Maximum number of elements in atomic memcpy the optimizer is " "allowed to unfold"))
unsigned getNumParams() const
Return the number of fixed parameters this function type requires.
Definition: DerivedTypes.h:139
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:382
unsigned getParamAlignment(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
This file declares a class to represent arbitrary precision floating point values and provide a varie...
std::underlying_type< E >::type Underlying(E Val)
Check that Val is in range for E, and return Val cast to E&#39;s underlying type.
Definition: BitmaskEnum.h:91
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:860
static const unsigned End
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
Definition: PatternMatch.h:931
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:78
void setCallingConv(CallingConv::ID CC)
Set the calling convention of the call.
Definition: CallSite.h:316
bool isGCResult(ImmutableCallSite CS)
Definition: Statepoint.cpp:53
static FunctionType * get(Type *Result, ArrayRef< Type *> Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:297
self_iterator getIterator()
Definition: ilist_node.h:82
Class to represent integer types.
Definition: DerivedTypes.h:40
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
Definition: APInt.h:443
void setNotConvergent()
Definition: CallSite.h:509
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:61
void setAlignment(unsigned Align)
static Value * simplifyX86varShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1320
const AMDGPUAS & AS
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:527
bool isVolatile() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1214
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1223
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:933
static InvokeInst * Create(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value *> Args, const Twine &NameStr, Instruction *InsertBefore=nullptr)
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:959
static Value * simplifyX86muldq(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
signed greater than
Definition: InstrTypes.h:887
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:244
static Value * simplifyX86extrq(IntrinsicInst &II, Value *Op0, ConstantInt *CILength, ConstantInt *CIIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding or conversion to a shuffle...
bool doesNotThrow() const
Determine if the call cannot unwind.
const APFloat & getValueAPF() const
Definition: Constants.h:294
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:918
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:452
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:163
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:240
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
This is the common base class for memset/memcpy/memmove.
Iterator for intrusive lists based on ilist_node.
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:178
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
static PointerType * getInt1PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:216
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:251
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the generic address space (address sp...
Definition: DerivedTypes.h:482
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
static Value * simplifyX86vpermilvar(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermilvar* to shufflevector if the mask is constant.
iterator end()
Definition: BasicBlock.h:254
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
IterTy arg_begin() const
Definition: CallSite.h:553
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
Type::subtype_iterator param_iterator
Definition: DerivedTypes.h:126
bool overlaps(const AttrBuilder &B) const
Return true if the builder has any attribute that&#39;s in the specified builder.
static Instruction * simplifyMaskedGather(IntrinsicInst &II, InstCombiner &IC)
void setDoesNotThrow()
Definition: CallSite.h:490
signed less than
Definition: InstrTypes.h:889
Type * getReturnType() const
Definition: DerivedTypes.h:124
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, unsigned Align, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:374
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition: APFloat.h:1190
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:1736
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:560
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.cpp:574
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:623
#define NC
Definition: regutils.h:42
CallInst * CreateMaskedLoad(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:353
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1272
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:516
bool isDenormal() const
Definition: APFloat.h:1133
void setOperand(unsigned i, Value *Val)
Definition: User.h:159
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
This class represents atomic memcpy intrinsic TODO: Integrate this class into MemIntrinsic hierarchy;...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
signed less or equal
Definition: InstrTypes.h:890
Class to represent vector types.
Definition: DerivedTypes.h:393
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:57
Class for arbitrary precision integers.
Definition: APInt.h:69
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:395
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1008
static Value * simplifyX86pshufb(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert pshufb* to shufflevector if the mask is constant.
static cl::opt< bool > FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), cl::init(false))
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
amdgpu Simplify well known AMD library false Value Value * Arg
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:333
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::ZeroOrMore, cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate IT block based on arch"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow deprecated IT based on ARMv8"), clEnumValN(NoRestrictedIT, "arm-no-restrict-it", "Allow IT blocks based on ARMv7")))
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:405
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
Definition: PatternMatch.h:407
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass&#39;s ...
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Definition: Instructions.h:364
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:529
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:185
static Value * simplifyMinnumMaxnum(const IntrinsicInst &II)
void setCalledFunction(Value *Fn)
Set the function called.
This class wraps the llvm.memcpy/memmove intrinsics.
static Value * simplifyMaskedLoad(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Instruction * simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC)
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:284
static bool maskIsAllOneOrUndef(Value *Mask)
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
OverflowResult
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
unsigned greater or equal
Definition: InstrTypes.h:884
match_one m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:194
void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Definition: CallSite.h:564
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:218
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool doesNotThrow() const
Determine if the call cannot unwind.
Definition: CallSite.h:487
void setArgOperand(unsigned i, Value *v)
bool isNormal() const
Definition: APFloat.h:1136
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc...
Value * optimizeCall(CallInst *CI)
optimizeCall - Take the given call instruction and return a more optimal value to replace the instruc...
static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID, unsigned EndID, InstCombiner &IC)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Type * getValueType() const
Definition: GlobalValue.h:262
static IntrinsicInst * findInitTrampoline(Value *Callee)
bool isByValOrInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed by value or in an alloca.
Definition: CallSite.h:590
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:81
AssumptionCache & getAssumptionCache() const
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:449
static PointerType * getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS=0)
Definition: Type.cpp:212
static Value * simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, APInt APLength, APInt APIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant folding or conversion to a shu...
bool isStatepoint(ImmutableCallSite CS)
Definition: Statepoint.cpp:27
static Constant * getNegativeIsTrueBoolVec(ConstantDataVector *V)
Return a constant boolean vector that has true elements in all positions where the input constant dat...
iterator_range< op_iterator > arg_operands()
Iteration adapter for range-for loops.
static Value * emitX86MaskSelect(Value *Mask, Value *Op0, Value *Op1, InstCombiner::BuilderTy &Builder)
This represents the llvm.va_copy intrinsic.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:538
match_all_ones m_AllOnes()
Match an integer or vector with all bits set to true.
Definition: PatternMatch.h:205
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
LoadInst * CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name)
Definition: IRBuilder.h:1182
static Instruction * foldCtpop(IntrinsicInst &II, InstCombiner &IC)
Value * getLength() const
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
LLVM Value Representation.
Definition: Value.h:73
void setAlignment(unsigned Align)
This file provides internal interfaces used to implement the InstCombine.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:593
Value * getRawSource() const
Return the arguments to the instruction.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
AttrBuilder typeIncompatible(Type *Ty)
Which attributes cannot be applied to a type.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
AttributeSet getFnAttributes() const
The function attributes are returned.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:270
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1027
Invoke instruction.
#define DEBUG(X)
Definition: Debug.h:118
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:148
bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Return true if the given value is known to be non-zero when defined.
IRTranslator LLVM IR MI
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:408
unsigned greater than
Definition: InstrTypes.h:883
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:974
void addAttribute(unsigned i, Attribute::AttrKind Kind)
adds the attribute to the list of attributes.
AttributeList getAttributes() const
Get the parameter attributes of the call.
Definition: CallSite.h:329
unsigned getNumElements() const
Return the number of elements in the array or vector.
Definition: Constants.cpp:2294
bool isConvergent() const
Determine if the call is convergent.
Definition: CallSite.h:503
static APInt getNullValue(unsigned numBits)
Get the &#39;0&#39; value.
Definition: APInt.h:562
match_nan m_NaN()
Match an arbitrary NaN constant. This includes quiet and signalling nans.
Definition: PatternMatch.h:183
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
static Constant * getMul(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2137
static Value * simplifyX86insertps(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
This class represents an extension of floating point types.
bool isEmpty() const
Return true if there are no attributes.
Definition: Attributes.h:646
Root of the metadata hierarchy.
Definition: Metadata.h:58
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:174
void setCalledFunction(Value *V)
Set the callee to the specified value.
Definition: CallSite.h:126
bool isSignaling() const
Definition: APFloat.h:1134
static Type * getPromotedType(Type *Ty)
Return the specified type promoted as it would be to pass though a va_arg area.
bool use_empty() const
Definition: Value.h:322
static Constant * get(ArrayRef< Constant *> V)
Definition: Constants.cpp:984
Type * getElementType() const
Definition: DerivedTypes.h:486
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1212
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:260
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:359
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute >> Attrs)
Create an AttributeList with the specified parameters in it.
Definition: Attributes.cpp:870
bool isLosslessCast() const
A lossless cast is one that does not alter the basic value.
bool isNullValue() const
Determine if all bits are clear.
Definition: APInt.h:399
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:215
signed greater or equal
Definition: InstrTypes.h:888
User * user_back()
Definition: Value.h:381
cmpResult compare(const APFloat &RHS) const
Definition: APFloat.h:1102
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:66
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:837
CallInst * CreateCall(Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1659